view customize_metaphlan_database.xml @ 0:a6c3f4f439f7 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 1e543a44ceffd8e4c5537b9015606ab3b90a114c"
author iuc
date Mon, 19 Apr 2021 20:56:51 +0000
parents
children 487da152fb43
line wrap: on
line source

<tool id="customize_metaphlan_database" name="Customize the marker sequences and metadata" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>from the MetaPhlAn database</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="edam_ontology"/>
    <expand macro="requirements">
        <requirement type="package" version="1.3">seqtk</requirement>
    </expand>
    <version_command>metaphlan -v</version_command>
    <command detect_errors="aggressive"><![CDATA[
python '$__tool_directory__/customizemetadata.py'
    $op.operation

#if $op.operation == "add_marker"
    --in_json '$in_json'
    --out_json '$out_json'
    --name $op.name
    --m_length $op.m_length
        #for $s in $op.genomes
    --g_length $s.g_length
            #if str($s.gca) != ''
    --gca '$s.gca'
            #else
    --gca ''
            #end if
    --k_name '$s.k_name'
    --k_id $s.k_id
    --p_name '$s.p_name'
    --p_id $s.p_id
    --c_name '$s.c_name'
    --c_id $s.c_id
    --o_name '$s.o_name'
    --o_id $s.o_id
    --f_name '$s.f_name'
    --f_id $s.f_id
    --g_name '$s.g_name'
    --g_id $s.g_id
    --s_name '$s.s_name'
    --s_id $s.s_id
    --t_name '$s.t_name'
        #end for
&&
cat 
    '$in_fasta'
    '$op.marker_seq'
    > '$out_fasta'

#else if $op.operation == "remove_markers"
    --in_json '$in_json'
    --markers '$op.markers'
    --out_json '$out_json'
    --kept_markers 'kept_makers'
&&
seqtk subseq
    '$in_fasta'
    'kept_makers'
    > '$out_fasta'

#else if $op.operation == "keep_markers"
    --in_json '$in_json'
    --markers '$op.markers'
    --out_json '$out_json'
&&
seqtk subseq
    '$in_fasta'
    '$op.markers'
    > '$out_fasta'
#end if
    ]]></command>
    <inputs>
        <param name="in_fasta" type="data" format="fasta" label="Marker sequences"/>
        <param argument="--in_json" type="data" format="json" label="Marker metadata"/>
        <conditional name="op">
            <param name="operation" type="select" label="Customization">
                <option value="add_marker" selected="true">Add new marker</option>
                <option value="remove_markers">Remove markers</option>
                <option value="keep_markers">Keep markers, others will be removed</option>
            </param>
            <when value="add_marker">
                <param name="marker_seq" type="data" format="fasta" label="Sequences of the new markers"/>
                <param argument="--name" type="text" label="Name of the new marker"/>
                <param argument="--m_length" type="integer" value="" label="Length of the new marker"/>
                <repeat name="genomes" min="1" title="Taxonomy of the genomes from which the new marker has been extracted">
                    <param argument="--g_length" type="integer" min="0" value="" label="Length of the genome"/>
                    <param argument="--gca" type="text" optional="true" label="GenBank assemblies id (GCA) of the genome"/>
                    <param argument="--k_name" type="text" label="Kingdom: Name">
                        <sanitizer invalid_char="">
                            <valid initial="string.ascii_letters,string.digits">
                                <add value="_" />
                            </valid>
                        </sanitizer>
                    </param>
                    <param argument="--k_id" type="integer" min="0" value="" label="Kingdom: NCBI id"/>
                    <param argument="--p_name" type="text" label="Phylum: Name">
                        <sanitizer invalid_char="">
                            <valid initial="string.ascii_letters,string.digits">
                                <add value="_" />
                            </valid>
                        </sanitizer>
                    </param>
                    <param argument="--p_id" type="integer" min="0" value="" label="Phylum: NCBI id"/>
                    <param argument="--c_name" type="text" label="Class: Name">
                        <sanitizer invalid_char="">
                            <valid initial="string.ascii_letters,string.digits">
                                <add value="_" />
                            </valid>
                        </sanitizer>
                    </param>
                    <param argument="--c_id" type="integer" min="0" value="" label="Class: NCBI id"/>
                    <param argument="--o_name" type="text" label="Order: Name">
                        <sanitizer invalid_char="">
                            <valid initial="string.ascii_letters,string.digits">
                                <add value="_" />
                            </valid>
                        </sanitizer>
                    </param>
                    <param argument="--o_id" type="integer" min="0" value="" label="Order: NCBI id"/>
                    <param argument="--f_name" type="text" label="Family: Name">
                        <sanitizer invalid_char="">
                            <valid initial="string.ascii_letters,string.digits">
                                <add value="_" />
                            </valid>
                        </sanitizer>
                    </param>
                    <param argument="--f_id" type="integer" min="0" value="" label="Family: NCBI id"/>
                    <param argument="--g_name" type="text" label="Genus: Name">
                        <sanitizer invalid_char="">
                            <valid initial="string.ascii_letters,string.digits">
                                <add value="_" />
                            </valid>
                        </sanitizer>
                    </param>
                    <param argument="--g_id" type="integer" min="0" value="" label="Genus: NCBI id"/>
                    <param argument="--s_name" type="text" label="Species: Name">
                        <sanitizer invalid_char="">
                            <valid initial="string.ascii_letters,string.digits">
                                <add value="_" />
                            </valid>
                        </sanitizer>
                    </param>
                    <param argument="--s_id" type="integer" min="0" value="" label="Species: NCBI id"/>
                    <param argument="--t_name" type="text" label="Strain: Name">
                        <sanitizer invalid_char="">
                            <valid initial="string.ascii_letters,string.digits">
                                <add value="_" />
                            </valid>
                        </sanitizer>
                    </param>
                </repeat>
            </when>
            <when value="remove_markers">
                <param argument="--markers" type="data" format="tabular,txt" label="List of markers to remove" help="1 marker per line"/>
            </when>
            <when value="keep_markers">
                <param argument="--markers" type="data" format="tabular,txt" label="List of markers to keep" help="1 marker per line"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="out_fasta" format="fasta" label="${tool.name} on ${on_string}: Markers sequences" />
        <data name="out_json" format="json" label="${tool.name} on ${on_string}: Marker metadata" />
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <param name="in_fasta" value="test-db-without-one-marker.fasta"/>
            <param name="in_json" value="test-db-without-one-marker.json"/>
            <conditional name="op">
                <param name="operation" value="add_marker"/>
                <param name="marker_seq" value="marker_sequence.fasta"/>
                <param name="name" value="13076__A0A2I1PE66__CYJ72_10760"/>
                <param name="m_length" value="540"/>
                <repeat name="genomes">
                    <param name="g_length" value="2411251"/>
                    <param name="gca" value="GCA_002847845"/>
                    <param name="k_name" value="Bacteria"/>
                    <param name="k_id" value="2"/>
                    <param name="p_name" value="Bacilli"/>
                    <param name="p_id" value="1239"/>
                    <param name="c_name" value="Negativicutes"/>
                    <param name="c_id" value="91061"/>
                    <param name="o_name" value="Lactobacillales"/>
                    <param name="o_id" value="186826"/>
                    <param name="f_name" value="Aerococcaceae"/>
                    <param name="f_id" value="186827"/>
                    <param name="g_name" value="Globicatella"/>
                    <param name="g_id" value="13075"/>
                    <param name="s_name" value="Globicatella_sanguinis"/>
                    <param name="s_id" value="13076"/>
                    <param name="t_name" value="GCA_002847845"/>
                </repeat>
            </conditional>
            <output name="out_fasta" file="test-db.fasta" compare="sim_size">
                <has_text text="13076__A0A2I1PE66__CYJ72_10760" />
            </output>
            <output name="out_json" file="test-db.json" compare="sim_size">
                <has_text text="13076__A0A2I1PE66__CYJ72_10760" />
            </output>
        </test>
        <test expect_num_outputs="2">
            <param name="in_fasta" value="test-db.fasta"/>
            <param name="in_json" value="test-db.json"/>
            <conditional name="op">
                <param name="operation" value="remove_markers"/>
                <param name="markers" value="marker.txt"/>
            </conditional>
            <output name="out_fasta" file="test-db-without-one-marker.fasta" compare="sim_size">
                <not_has_text text="13076__A0A2I1PE66__CYJ72_10760" />
            </output>
            <output name="out_json" file="test-db-without-one-marker.json" compare="sim_size">
                <not_has_text text="13076__A0A2I1PE66__CYJ72_10760" />
            </output>
        </test>
        <test expect_num_outputs="2">
            <param name="in_fasta" value="test-db.fasta"/>
            <param name="in_json" value="test-db.json"/>
            <conditional name="op">
                <param name="operation" value="keep_markers"/>
                <param name="markers" value="marker.txt"/>
            </conditional>
            <output name="out_fasta" file="test-db-with-one-marker.fasta" compare="sim_size">
                <has_text text="13076__A0A2I1PE66__CYJ72_10760" />
            </output>
            <output name="out_json" file="test-db-with-one-marker.json" compare="sim_size">
                <has_text text="13076__A0A2I1PE66__CYJ72_10760" />
            </output>
        </test>
    </tests>
    <help><![CDATA[
What it does
============

Customize the marker sequences (in fasta format) and metadata (in JSON) for a MetaPhlAn database:

- Add marker
- Remove markers
- Keep markers

Inputs
======

MetaphlAn database (can be extracted with dedicated tool)

- Fasta file with marker sequences
- JSON file with marker metadata

The other inputs depends on the type of customization

- Add marker
    - Fasta file with the sequence of new marker
    - Information about the new marker and related genomes

- Remove markers
    - File with list of markers to remove

- Keep markers
    - File with list of markers to keep

Outputs
=======

Customized database (that can be used as input for MetaphlAn tool)

- Fasta file with marker sequences
- JSON file with marker metadata

    ]]></help>
    <expand macro="citations"/>
</tool>