diff customize_metaphlan_database.xml @ 0:c0473c69ac9f draft

planemo upload for repository https://github.com/quadram-institute-bioscience/galaxy-tools/tree/master/tools/metaphlan/
author thanhlv
date Mon, 13 Feb 2023 11:36:16 +0000
parents
children b6e5df1237f2
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/customize_metaphlan_database.xml	Mon Feb 13 11:36:16 2023 +0000
@@ -0,0 +1,281 @@
+<tool id="customize_metaphlan_database" name="Customize the marker sequences and metadata" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>from the MetaPhlAn database</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edam_ontology"/>
+    <expand macro="requirements">
+        <requirement type="package" version="1.3">seqtk</requirement>
+    </expand>
+    <version_command>metaphlan -v</version_command>
+    <command detect_errors="aggressive"><![CDATA[
+python '$__tool_directory__/customizemetadata.py'
+    $op.operation
+
+#if $op.operation == "add_marker"
+    --in_json '$in_json'
+    --out_json '$out_json'
+    --name $op.name
+    --m_length $op.m_length
+        #for $s in $op.genomes
+    --g_length $s.g_length
+            #if str($s.gca) != ''
+    --gca '$s.gca'
+            #else
+    --gca ''
+            #end if
+    --k_name '$s.k_name'
+    --k_id $s.k_id
+    --p_name '$s.p_name'
+    --p_id $s.p_id
+    --c_name '$s.c_name'
+    --c_id $s.c_id
+    --o_name '$s.o_name'
+    --o_id $s.o_id
+    --f_name '$s.f_name'
+    --f_id $s.f_id
+    --g_name '$s.g_name'
+    --g_id $s.g_id
+    --s_name '$s.s_name'
+    --s_id $s.s_id
+    --t_name '$s.t_name'
+        #end for
+&&
+cat 
+    '$in_fasta'
+    '$op.marker_seq'
+    > '$out_fasta'
+
+#else if $op.operation == "remove_markers"
+    --in_json '$in_json'
+    --markers '$op.markers'
+    --out_json '$out_json'
+    --kept_markers 'kept_makers'
+&&
+seqtk subseq
+    '$in_fasta'
+    'kept_makers'
+    > '$out_fasta'
+
+#else if $op.operation == "keep_markers"
+    --in_json '$in_json'
+    --markers '$op.markers'
+    --out_json '$out_json'
+&&
+seqtk subseq
+    '$in_fasta'
+    '$op.markers'
+    > '$out_fasta'
+#end if
+    ]]></command>
+    <inputs>
+        <param name="in_fasta" type="data" format="fasta" label="Marker sequences"/>
+        <param argument="--in_json" type="data" format="json" label="Marker metadata"/>
+        <conditional name="op">
+            <param name="operation" type="select" label="Customization">
+                <option value="add_marker" selected="true">Add new marker</option>
+                <option value="remove_markers">Remove markers</option>
+                <option value="keep_markers">Keep markers, others will be removed</option>
+            </param>
+            <when value="add_marker">
+                <param name="marker_seq" type="data" format="fasta" label="Sequences of the new markers"/>
+                <param argument="--name" type="text" label="Name of the new marker"/>
+                <param argument="--m_length" type="integer" value="" label="Length of the new marker"/>
+                <repeat name="genomes" min="1" title="Taxonomy of the genomes from which the new marker has been extracted">
+                    <param argument="--g_length" type="integer" min="0" value="" label="Length of the genome"/>
+                    <param argument="--gca" type="text" optional="true" label="GenBank assemblies id (GCA) of the genome"/>
+                    <param argument="--k_name" type="text" label="Kingdom: Name">
+                        <sanitizer invalid_char="">
+                            <valid initial="string.ascii_letters,string.digits">
+                                <add value="_" />
+                            </valid>
+                        </sanitizer>
+                    </param>
+                    <param argument="--k_id" type="integer" min="0" value="" label="Kingdom: NCBI id"/>
+                    <param argument="--p_name" type="text" label="Phylum: Name">
+                        <sanitizer invalid_char="">
+                            <valid initial="string.ascii_letters,string.digits">
+                                <add value="_" />
+                            </valid>
+                        </sanitizer>
+                    </param>
+                    <param argument="--p_id" type="integer" min="0" value="" label="Phylum: NCBI id"/>
+                    <param argument="--c_name" type="text" label="Class: Name">
+                        <sanitizer invalid_char="">
+                            <valid initial="string.ascii_letters,string.digits">
+                                <add value="_" />
+                            </valid>
+                        </sanitizer>
+                    </param>
+                    <param argument="--c_id" type="integer" min="0" value="" label="Class: NCBI id"/>
+                    <param argument="--o_name" type="text" label="Order: Name">
+                        <sanitizer invalid_char="">
+                            <valid initial="string.ascii_letters,string.digits">
+                                <add value="_" />
+                            </valid>
+                        </sanitizer>
+                    </param>
+                    <param argument="--o_id" type="integer" min="0" value="" label="Order: NCBI id"/>
+                    <param argument="--f_name" type="text" label="Family: Name">
+                        <sanitizer invalid_char="">
+                            <valid initial="string.ascii_letters,string.digits">
+                                <add value="_" />
+                            </valid>
+                        </sanitizer>
+                    </param>
+                    <param argument="--f_id" type="integer" min="0" value="" label="Family: NCBI id"/>
+                    <param argument="--g_name" type="text" label="Genus: Name">
+                        <sanitizer invalid_char="">
+                            <valid initial="string.ascii_letters,string.digits">
+                                <add value="_" />
+                            </valid>
+                        </sanitizer>
+                    </param>
+                    <param argument="--g_id" type="integer" min="0" value="" label="Genus: NCBI id"/>
+                    <param argument="--s_name" type="text" label="Species: Name">
+                        <sanitizer invalid_char="">
+                            <valid initial="string.ascii_letters,string.digits">
+                                <add value="_" />
+                            </valid>
+                        </sanitizer>
+                    </param>
+                    <param argument="--s_id" type="integer" min="0" value="" label="Species: NCBI id"/>
+                    <param argument="--t_name" type="text" label="Strain: Name">
+                        <sanitizer invalid_char="">
+                            <valid initial="string.ascii_letters,string.digits">
+                                <add value="_" />
+                            </valid>
+                        </sanitizer>
+                    </param>
+                </repeat>
+            </when>
+            <when value="remove_markers">
+                <param argument="--markers" type="data" format="tabular,txt" label="List of markers to remove" help="1 marker per line"/>
+            </when>
+            <when value="keep_markers">
+                <param argument="--markers" type="data" format="tabular,txt" label="List of markers to keep" help="1 marker per line"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="out_fasta" format="fasta" label="${tool.name} on ${on_string}: Markers sequences" />
+        <data name="out_json" format="json" label="${tool.name} on ${on_string}: Marker metadata" />
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <param name="in_fasta" value="test-db-without-one-marker.fasta"/>
+            <param name="in_json" value="test-db-without-one-marker.json"/>
+            <conditional name="op">
+                <param name="operation" value="add_marker"/>
+                <param name="marker_seq" value="marker_sequence.fasta"/>
+                <param name="name" value="13076__A0A2I1PE66__CYJ72_10760"/>
+                <param name="m_length" value="540"/>
+                <repeat name="genomes">
+                    <param name="g_length" value="2411251"/>
+                    <param name="gca" value="GCA_002847845"/>
+                    <param name="k_name" value="Bacteria"/>
+                    <param name="k_id" value="2"/>
+                    <param name="p_name" value="Bacilli"/>
+                    <param name="p_id" value="1239"/>
+                    <param name="c_name" value="Negativicutes"/>
+                    <param name="c_id" value="91061"/>
+                    <param name="o_name" value="Lactobacillales"/>
+                    <param name="o_id" value="186826"/>
+                    <param name="f_name" value="Aerococcaceae"/>
+                    <param name="f_id" value="186827"/>
+                    <param name="g_name" value="Globicatella"/>
+                    <param name="g_id" value="13075"/>
+                    <param name="s_name" value="Globicatella_sanguinis"/>
+                    <param name="s_id" value="13076"/>
+                    <param name="t_name" value="GCA_002847845"/>
+                </repeat>
+            </conditional>
+            <output name="out_fasta" file="test-db.fasta" compare="sim_size">
+              <assert_contents>   
+                <has_text text="13076__A0A2I1PE66__CYJ72_10760" />
+              </assert_contents>
+            </output>
+            <output name="out_json" file="test-db.json" compare="sim_size">
+              <assert_contents>
+                <has_text text="13076__A0A2I1PE66__CYJ72_10760" />
+              </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="in_fasta" value="test-db.fasta"/>
+            <param name="in_json" value="test-db.json"/>
+            <conditional name="op">
+                <param name="operation" value="remove_markers"/>
+                <param name="markers" value="marker.txt"/>
+            </conditional>
+            <output name="out_fasta" file="test-db-without-one-marker.fasta" compare="sim_size">
+              <assert_contents>
+                <not_has_text text="13076__A0A2I1PE66__CYJ72_10760" />
+              </assert_contents>
+            </output>
+            <output name="out_json" file="test-db-without-one-marker.json" compare="sim_size">
+              <assert_contents>
+                <not_has_text text="13076__A0A2I1PE66__CYJ72_10760" />
+              </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <param name="in_fasta" value="test-db.fasta"/>
+            <param name="in_json" value="test-db.json"/>
+            <conditional name="op">
+                <param name="operation" value="keep_markers"/>
+                <param name="markers" value="marker.txt"/>
+            </conditional>
+            <output name="out_fasta" file="test-db-with-one-marker.fasta" compare="sim_size">
+              <assert_contents>
+                <has_text text="13076__A0A2I1PE66__CYJ72_10760" />
+              </assert_contents>
+            </output>
+            <output name="out_json" file="test-db-with-one-marker.json" compare="sim_size">
+              <assert_contents>
+                <has_text text="13076__A0A2I1PE66__CYJ72_10760" />
+              </assert_contents> 
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+What it does
+============
+
+Customize the marker sequences (in fasta format) and metadata (in JSON) for a MetaPhlAn database:
+
+- Add marker
+- Remove markers
+- Keep markers
+
+Inputs
+======
+
+MetaphlAn database (can be extracted with dedicated tool)
+
+- Fasta file with marker sequences
+- JSON file with marker metadata
+
+The other inputs depends on the type of customization
+
+- Add marker
+    - Fasta file with the sequence of new marker
+    - Information about the new marker and related genomes
+
+- Remove markers
+    - File with list of markers to remove
+
+- Keep markers
+    - File with list of markers to keep
+
+Outputs
+=======
+
+Customized database (that can be used as input for MetaphlAn tool)
+
+- Fasta file with marker sequences
+- JSON file with marker metadata
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>