view fastk.xml @ 9:8779f4d2d04d draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastk commit de98c1736ab624bff379c3e83ccfc19cb2995684
author iuc
date Wed, 19 Feb 2025 11:17:00 +0000
parents 33079540e88f
children
line wrap: on
line source

<tool id="fastk_fastk" name="FastK" version="@TOOL_VERSION@+galaxy2" profile="23.2">
    <description>A k-mer counter for high-quality assembly datasets</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="xrefs"/>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
        mkdir -p ktabfiles && 
        #if $infile.is_of_type("fastq"):
            #set INPUTFILE="input.fastq"
        #elif $infile.is_of_type("fastq.gz"):
            #set INPUTFILE="input.fastq.gz"
        #else
            #set INPUTFILE="input."+$infile.ext
        #end if
        ln -s '$infile' $INPUTFILE && 
        FastK $INPUTFILE 
        -k$kmer_size 
        #if $sorted_table.sorted_table_option == 'yes_with_default':
            -t 
        #elif $sorted_table.sorted_table_option == 'yes_with_custom':
            -t$sorted_table_cutoff 
        #end if
        -T\${GALAXY_SLOTS:-8} -Noutput -Pktabfiles
        #if $sorted_table.sorted_table_option == 'yes_with_default':
            && Tabex output.ktab -t${sorted_table.tabex_threshold_for_default} LIST > '$tabex_hist'
            && cp .*.ktab* ktabfiles/ 
            && cp  *.ktab ktabfiles/
        #elif $sorted_table.sorted_table_option == 'yes_with_custom':
            && Tabex output.ktab -t${sorted_table.tabex_threshold_for_custom} LIST > '$tabex_hist'
            && cp *.ktab* ktabfiles/
            && cp *.ktab ktabfiles/
        #end if
        && tar -cf fastk.tar ktabfiles/
    ]]></command>
    <inputs>
        <param name="infile" type="data" format="fasta,fasta.gz,fastq,fastq.gz,cram,unsorted.bam,sam" label="Input file"/>   
        <param name="kmer_size" argument="-k" type="integer" min="5" max="50" value="40" label="Enter desired k-mer size" help="Default: 40"/>
        <conditional name="sorted_table">
            <param name="sorted_table_option" type="select" label="Sort table" help="Do you want a sorted table of all canonical k-mers and their counts? The sorted table is sorted lexicographically on the k-mer where a &lt; c &lt; g &lt; t.">
                <option value="no">No</option>
                <option value="yes_with_default">Yes, Default sorted </option>
                <option value="yes_with_custom">Yes, Custom sorted </option>
            </param>
            <when value="no"/>
            <when value="yes_with_default">
                <param name="tabex_threshold_for_default" argument="-t" type="integer" value="5" min="1" label="Tabex count threshold" help="Trim all k-mers with counts less than threshold"/>
            </when>
            <when value="yes_with_custom">
                <param name="sorted_table_cutoff" type="integer" min="2" value="10" label="Enter sorted table cutoff value"/>
                <param name="tabex_threshold_for_custom" argument="-t" type="integer" value="5" min="1" label="Tabex count threshold" help="Trim all k-mers with counts less than threshold"/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="fastk_out" format="fastk_ktab_tar" from_work_dir="fastk.tar" label="${tool.name} on ${on_string}: FastK files"/>
        <data name="fastk_hist_out" format="fastk_hist" from_work_dir="output.hist" label="${tool.name} on ${on_string}: FastK hist"/>
        <data name="fastk_ktab_out" format="fastk_ktab" from_work_dir="ktabfiles/output.ktab" label="${tool.name} on ${on_string}: FastK ktab">
            <filter> sorted_table['sorted_table_option'] != 'no' </filter> 
        </data>
        <data name="tabex_hist" format="txt"  label="${tool.name} on ${on_string}: Tabex output">
            <filter> sorted_table['sorted_table_option'] != 'no' </filter>
        </data>
    </outputs>
    <tests>
        <!-- TEST 1 -->
        <test expect_num_outputs="2">
            <param name="infile" value="input01.fasta.gz"/>
            <param name="kmer_size" value="40"/>
            <output name="fastk_hist_out" file="test01.hist" ftype="fastk_hist"/>
            <output name="fastk_out" ftype="fastk_ktab_tar">
                <assert_contents>
                    <has_archive_member path="ktabfiles"/>
                </assert_contents>
            </output>
        </test>
        <!-- TEST 2 -->
        <test expect_num_outputs="4">
            <param name="infile" value="input01.fasta.gz"/>
            <conditional name="sorted_table">
                <param name="sorted_table_option" value="yes_with_default"/>
            </conditional>
            <output name="fastk_hist_out" file="test02.hist" ftype="fastk_hist"/>
            <output name="fastk_out" ftype="fastk_ktab_tar">
                <assert_contents>
                    <has_archive_member path="ktabfiles/output.ktab"/>
                </assert_contents>
            </output>
            <output name="tabex_hist" file="test02.tabex.txt"/>
        </test>
        <!-- TEST 3 -->
        <test expect_num_outputs="4">
            <param name="infile" value="input01.fasta.gz"/>
            <conditional name="sorted_table">
                <param name="sorted_table_option" value="yes_with_custom"/>
                <param name="sorted_table_cutoff" value="5"/>
            </conditional>
            <output name="fastk_hist_out" file="test03.hist" ftype="fastk_hist"/>
            <output name="fastk_out" ftype="fastk_ktab_tar">
                <assert_contents>
                    <has_archive_member path="ktabfiles/output.ktab"/>
                </assert_contents>
            </output>
            <output name="tabex_hist" file="test03.tabex.txt"/>
        </test>       
    </tests>
    <help><![CDATA[
        FastK is a k‑mer counter that is optimized for processing high quality DNA assembly data sets such as those produced with an Illumina instrument or a PacBio run in HiFi mode.

        The input data can be in CRAM, BAM, SAM, fasta, or fastq files.
    
        FastK produces the following outputs:

        1. FastK hist: file in binary format containing histogram information detailing the frequency of occurrence for each k‑mer within the dataset.

        2. A Tabex txt file comprising a table of k‑mer/count pairs, sorted lexicographically on the k‑mer sequence, followingthe order a < c < g < t

        3. A tar file comprising of hidden .ktab files that can be used for downstream FASTK tools.

    ]]></help>
    <expand macro="citations"/>
</tool>