Mercurial > repos > iuc > picrust2_place_seqs

<tool id="picrust2_place_seqs" name="PICRUSt2 Sequence placement" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>into reference tree</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tool"/>
    <expand macro="requirements"/>
    <version_command>place_seqs.py -v</version_command>
    <command detect_errors="exit_code"><![CDATA[
@VAR_ACCESS_FOO@
@PLACE_SEQS_PREPROCESSING@

#if $intermediate_check
    mkdir intermediate &&
#end if

place_seqs.py
    @PLACE_SEQS_PARAMS@
    --out_tree '$out_tree'
    --processes "\${GALAXY_SLOTS:-1}"
#if $intermediate_check
    --intermediate 'intermediate/place_seqs'
#end if
## not implemented --chunk_size hoping that the default is carefully chosen
## otherwise one might need to compute a "good" value from the input size
## and the number of available processors
    ]]></command>
    <inputs>
        <expand macro="place_seqs_params"/>
        <param argument="--intermediate_check" type="boolean" truevalue="intermediate_check" falsevalue="" checked="false" label="Keep intermediate files" help="Intermediate output files will be deleted by default"/>
   </inputs>
   <outputs>
        <expand macro="place_seqs_output" from_work_dir="./">
            <filter>intermediate_check is True</filter>
        </expand>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <param name="study_fasta" ftype="fasta" value="study_seqs_test.fasta"/>
            <param name="placement_tool" value="sepp"/>
            <param name="intermediate_check" value="true"/>
            <param name="min_align" value="0.80"/>
            <output name="out_tree" ftype="newick">
                <assert_contents>
                    <has_text text="643348582"/>
                    <has_n_lines n="1"/>
                </assert_contents>
            </output>
            <output_collection name="place_seqs_intermediate_output" type="list" count="2">
                <element name="query_align" ftype="stockholm">
                    <assert_contents>
                        <has_text text="STOCKHOLM 1.0"/>
                        <has_n_lines n="160106"/>
                    </assert_contents>
                </element>
                <element name="study_seqs_filtered" ftype="fasta">
                    <assert_contents>
                        <has_text text="02905cfb87861c837dde629596d9272b"/>
                        <has_n_lines n="10"/>
                    </assert_contents>
                </element>
            </output_collection>
            <assert_command>
                <!-- make sure that prokaryotic reference is implemented by not
                     setting the ref_dir parameter, i.e. use the programms default -->
                <has_text text="prokaryotic/pro_ref/" negate="true"/>
                <has_text text="--ref_dir" negate="true"/>
            </assert_command>
        </test>
        <!-- input data does not really work with fungal reference - but sufficient for CLI test -->
        <test expect_exit_code="1" expect_failure="true">
            <param name="study_fasta" ftype="fasta" value="study_seqs_test.fasta"/>
            <param name="placement_tool" value="epa-ng"/>
            <param name="min_align" value="0.10"/>
            <conditional name="ref_dir">
                <param name="selector" value="fungi/fungi_ITS/"/>
            </conditional>
            <assert_stderr>
                <has_text text="all 5 input sequences aligned poorly to reference sequences"/>
            </assert_stderr>
            <assert_command>
                <has_text text="fungi/fungi_ITS/"/>
                <has_text text="--min_align 0.1"/>
            </assert_command>
        </test>
        <test expect_num_outputs="1">
            <param name="study_fasta" value="study_seqs_test2.fasta"/>
            <param name="placement_tool" value="epa-ng"/>
            <param name="min_align" value="0.80"/>
            <param name="intermediate_check" value="false"/>
            <conditional name="ref_dir">
                <param name="selector" value="custom"/>
                <param name="custom_fna" value="img_centroid_16S_aligned_head30.fna.gz"/>
                <param name="custom_hmm" value="img_centroid_16S_aligned_head30.hmm"/>
                <param name="custom_tre" value="img_centroid_16S_aligned_head30.tre"/>
                <param name="custom_model" value="img_centroid_16S_aligned_head30.model"/>
            </conditional>
            <output name="out_tree" ftype="newick">
                <assert_contents>
                    <has_text text="2511231175_test"/>
                    <has_n_lines n="1"/>
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="custom/"/>
            </assert_command>
        </test>
    </tests>
    <help><![CDATA[
@HELP_HEADER@

Place Seqs (Sequence placement)
===============================
PICRUSt2 wraps HMMER to place study sequences into a reference multiple-sequence alignment and then places these sequences into the reference phylogeny with EPA-NG or SEPP. The "study sequences" referred to will be the representative OTUs and/or ASVs under the typical workflow. The tool GAPPA is used to convert the resulting .jplace object into newick format.

Note
====
This is typically done to prepare for subsequent hidden-state prediction with PICRUSt2. Requires unaligned FASTA of study sequences. Users can specify a non-default reference files if needed.

Input
=====
The study sequences (i.e. FASTA of amplicon sequence variants or operational taxonomic units)

Output
======
Output tree with placed study sequences.

    ]]></help>
    <citations>
        <citation type="doi">10.1038/s41587-020-0548-6</citation>
    </citations>
</tool>
author	iuc
date	Tue, 13 Aug 2024 12:11:37 +0000
parents	680fb3eb574c
children