view fargene.xml @ 0:6f743c615c41 draft

"planemo upload for repository commit 867e4a6fad4c2622ad69517e2d4d9ba185109b72"
author iuc
date Thu, 28 Nov 2019 14:39:41 -0500
children 239ce9f24386
line wrap: on
line source

<tool id="fargene" name="fargene" version="@VERSION@">
    <description>Fragmented antibiotic resistance gene identifier </description>
    <expand macro="requirements" />
    <version_command>fargene --version</version_command>
    <command detect_errors="exit_code"><![CDATA[
    #import re
    #if $inputs.input_type == 'paired':
        #set $safename_R1 = re.sub('[^\w\-_\.]', '_', $inputs.R1.element_identifier)
        #set $safename_R2 = re.sub('[^\w\-_\.]', '_', $inputs.R2.element_identifier)
        ln -fs '$inputs.R1' ${safename_R1}.fastq &&
        ln -fs '$inputs.R2' ${safename_R2}.fastq &&
    #elif $inputs.input_type == 'collection':
        #for $i, $input in enumerate($inputs.input_collection)
            #set $safename_fwd = re.sub('[^\w\-_\.]', '_', $input.element_identifier)
            ln -fs '${input.forward}' ${safename_fwd}_1.fastq &&
            #set $safename_rvs = re.sub('[^\w\-_\.]', '_', $input.element_identifier)
            ln -fs '${input.reverse}' ${safename_rvs}_2.fastq &&
        #end for
    #elif $inputs.input_type == 'sequence':
        #for $input in $inputs.input_sequence
            #set $safename_seq = re.sub('[^\w\-_\.]', '_', $input.element_identifier)
            ln -fs '$input' ${safename_seq}.fasta &&
        #end for
    #end if

    #if $inputs.input_type in ('paired', 'collection'):
    #elif $inputs.input_type == 'sequence':
    #end if
    --hmm-model $models
    --output fargene_output
    --tmp-dir tmp
    -p \${GALAXY_SLOTS:-4}
    #if $meta_score != 0.0:
        --meta-score '$meta_score'
    #end if
    #if $score != 0.0:
        --score '$score'
    #end if
    #if $protein:
    #end if
    #if $min_orf_length != 90:
        --min-orf-length '$min_orf_length'
    #end if
    #if $retrieve_whole:
    #end if
    #if $no_orf_predict:
    #end if
    #if $no_quality_filtering:
    #end if
    #if $no_assembly:
    #end if
    #if $orf_finder:
    #end if
    #if $store_peptides:
    #end if
    #if $inputs.input_type in ('paired', 'collection'):
        tar czf retrievedFragments.tar.gz fargene_output/retrievedFragments
    #end if
    ]]>    </command>
        <conditional name="inputs">
            <param name="input_type" type="select" label="Input type" help="Select 'paired end' reads or 'sequence' for genomes/contigs">
                <option value="paired" selected="true">Paired</option>
                <option value="collection">Paired Collection</option>
                <option value="sequence">Contigs/Genomes</option>
            <when value="paired">
                <param name="R1" type="data" format="fastqsanger,fastqsanger.gz" label="Forward reads (R1)" help="The file of forward reads in FASTQ format"/>
                <param name="R2" type="data" format="fastqsanger,fastqsanger.gz" label="Reverse reads (R2)" help="The file of reverse reads in FASTQ format"/>
            <when value="collection">
                <param name="input_collection" format="fastqsanger" type="data_collection" collection_type="list:paired" label="Paired collection"/>
            <when value="sequence">
                <param name="input_sequence" type="data" format="fasta" multiple="true" label="Input contigs/genomes" />

        <param name="models" type="select" label="Resistance Genes">
            <option value="class_a">Class A beta-lactamases</option>
            <option value="class_b_1_2">Subclass B1 and B2 beta-lactamases</option>
            <option value="class_b_3">Subclass B3 beta-lactamases</option>
            <option value="class_c">Class C beta-lactamases</option>
            <option value="class_d_1">Class D beta-lactamases-1</option>
            <option value="class_d_2">Class D beta-lactamases-2</option>
            <option value="qnr">QNR</option>
        <param name="score" argument="--score" type="float" value="0.0" label="The threshold score for a sequence to be classified as
                        a (almost) complete gene" />
        <param name="meta_score" argument="--meta-score" type="float" value="0.0" label="The threshold score for a fragment to be classified as
                        a positive. Expressed as score per amino acid" />
        <param name="protein" argument="--protein" type="boolean" truevalue="--protein" falsevalue="" checked="False" label="Rescue short unassmebled plasmids" />
        <param name="min_orf_length" argument="--min-orf-length" type="integer" min="1" value="90" label="The minimal length for a retrieved predicted ORF (nt)" />
        <param name="retrieve_whole" argument="--retrieve-whole" type="boolean" truevalue="--retrieve-whole " falsevalue="" checked="False" label="Use this flag if the whole sequence where a hit is
                        detected should be retrieved" />
        <param name="no_orf_predict" argument="--no-orf-predict" type="boolean" truevalue="--no-orf-predict" falsevalue="" checked="False" label="Do not perform ORF prediction" />
        <param name="no_quality_filtering" argument="--no-quality-filtering" type="boolean" truevalue="--no-quality-filtering" falsevalue="" checked="False" label="Use if no quality control should be performed on the
                        metagenomic data" />
        <param name="no_assembly" argument="--no-assembly" type="boolean" truevalue="--no-assembly" falsevalue="" checked="False" label="Use if you want to skip the assembly and retrieval of
                        contigs for metagenomic data" />
        <param name="orf_finder" argument="--orf-finder" type="boolean" truevalue="--orf-finder" falsevalue="" checked="False" label="Use NCBI ORFfinder instead of prodigal for ORF
                        prediction of genomes/contigs" />
        <param name="store_peptides" argument="--store-peptides" type="boolean" truevalue="--store-peptides" falsevalue="" checked="False" label="Store the translated sequences. Useful if you plan to
                        redo the analysis using a different model and want to
                        skip the preprocessing steps" />

        <data name="summary" format="txt" from_work_dir="fargene_output/results_summary.txt" label="${} on ${on_string} (Summary)">
        <data name="retrievedFragments" format="tar.gz" from_work_dir="retrievedFragments.tar.gz" label="${} on ${on_string} (Retrieved Fragments)">
            <filter>inputs["input_type"] in ['paired' , 'collection']</filter>
        <data name="fargene_log" format="txt" from_work_dir="fargene_analysis.log" label="${} on ${on_string} (log)">
        <collection name="hmmsearchresults" type="list" label="HMM Search Result">
            <discover_datasets pattern="__name__" directory="fargene_output/hmmsearchresults" format="txt" ext="out" visible="false" />
        <collection name="predictedGenes" type="list" label="Predicted Genes">
            <discover_datasets pattern="__name__" directory="fargene_output/predictedGenes" ext="fasta" format="fasta" visible="false" />
        <test expect_num_outputs="5">
            <conditional name="inputs">
                <param name="input_type" value="paired"/>
                <param name="R1" value="reads_1.fastq"/>
                <param name="R2" value="reads_2.fastq"/>
            <output name="summary" file="paired/results_summary.txt" compare="sim_size"/>
        <test expect_num_outputs="5">
            <conditional name="inputs">
                <param name="input_type" value="collection"/>
                <param name="input_collection">
                    <collection type="list:paired">
                        <element name="Pair1">
                            <collection type="paired">
                                <element name="forward" value="reads_1.fastq" ftype="fastqsanger"/>
                                <element name="reverse" value="reads_2.fastq" ftype="fastqsanger"/>
            <output name="summary" file="paired/results_summary.txt" compare="sim_size"/>
        <test expect_num_outputs="4">
            <conditional name="inputs">
                <param name="input_type" value="sequence"/>
                <param name="input_sequence" value="klebsiella_plasmid.fasta"/>
                <param name="models" value="class_b_1_2" />
            <output name="summary" file="contigs/results_summary.txt" compare="sim_size"/>

    fARGene (Fragmented Antibiotic Resistance Gene iENntifiEr ) is a tool that takes either fragmented metagenomic data or longer sequences as input and predicts and delivers full-length antiobiotic resistance genes as output. The tool includes developed and optimized models for a number or resistance gene types, and the functionality to create and optimize models of your own choice of resistance genes.

    The current version of the tool includes developed and optimized models for identification of the following resistance genes

    - Class A beta-lactamases
    - Subclass B1 and B2 beta-lactamases
    - Subclass B3 beta-lactamases
    - Class C beta-lactamases
    - Class D beta-lactamases
    - qnr
    <expand macro="citations" />