view genenotebook_build.xml @ 8:29eeddd8ea62 draft

planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/genenotebook commit 1429f00733ee24bf2edf56b9fd7ff4ad2dec3f15
author gga
date Tue, 20 Feb 2024 10:05:05 +0000
parents bd94e502872a
children
line wrap: on
line source

<?xml version="1.0"?>
<tool id="genenotebook_build" name="Build a GeneNoteBook" version="@WRAPPER_VERSION@" profile="21.05">
    <description></description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
@START_GNB@

#for genome in $genomes:
    genoboo add genome @CONNECT_INFO@ --name '${genome.name}' ${genome.public} '${genome.genome}';

    #for annot in $genome.annots:
        genoboo add annotation @CONNECT_INFO@ --name '${genome.name}' --annot '${annot.annot}'
        #if $annot.prot_naming.method == 'regex'
            #if $annot.prot_naming.re_protein:
            --re_protein '$annot.prot_naming.re_protein'
            #end if
            #if $annot.prot_naming.re_protein_capture:
            --re_protein_capture '$annot.prot_naming.re_protein_capture'
            #end if
        #elif $annot.prot_naming.method == "attr"
            #if $annot.prot_naming.protein_id_attr:
            --attr_protein '$annot.prot_naming.attr_protein'
            #end if
        #end if
        '${annot.annotation}';
        #if $annot.interproscan:
            genoboo add interproscan @CONNECT_INFO@ --format
            #if $annot.interproscan.is_of_type('gff3'):
                gff3
            #else:
                tsv
            #end if
            --annot '${annot.annot}'
            '${annot.interproscan}';
        #end if

        #if $annot.eggnog:
            genoboo add eggnog @CONNECT_INFO@ --annot '${annot.annot}' '${annot.eggnog}';
        #end if

        #if $annot.hectar:
            genoboo add hectar @CONNECT_INFO@ --annot '${annot.annot}' '${annot.hectar}';
        #end if

        #if $annot.blast_cond.blast_choice == "blast":
            genoboo add blast @CONNECT_INFO@
                --annot '${annot.annot}'
                --format 'xml'
                --algorithm '${annot.blast_cond.algorithm}'
                --matrix '${annot.blast_cond.matrix}'
                --database '${annot.blast_cond.database}'
                '${annot.blast_cond.blast}';
        #elif $annot.blast_cond.blast_choice == "diamond":
            genoboo add diamond @CONNECT_INFO@
                --annot '${annot.annot}'
                --format 'xml'
                --algorithm '${annot.blast_cond.algorithm}'
                --matrix '${annot.blast_cond.matrix}'
                --database '${annot.blast_cond.database}'
                '${annot.blast_cond.diamond}';
        #end if

        #for exp in $annot.expression:
                genoboo add expression @CONNECT_INFO@
                    --annot '${annot.annot}'
                    --sample-description ''
                    ${exp.public}
                    #for replica in $exp.expression_replicas:
                        --replicas '${replica.replicas}'
                        --replica-names '${replica.replica_names}'
                    #end for
                    --
                    '${exp.counts}';
        #end for
    #end for
#end for

@ZIP_GNB@
    ]]></command>
    <inputs>
        <param name="existing" type="data" format="genenotebook" optional="true" label="Load data in an existing GeneNoteBook database" help="Leave empty to start a new database from scratch" />
        <repeat name="genomes" title="Genomes">
            <param argument="--name" label="Name" type="text" help="Reference genome name" />
            <param name="genome" label="Genome sequence" type="data" format="fasta" />
            <param name="public" label="Public access" type="boolean" truevalue="--public" falsevalue="" value="false" />
            <repeat name="annots" title="Annotations">
                <param name="annotation" label="Annotation" type="data" format="gff3" />
                <param argument="--annot" label="Annotation name" type="text" help="Annotation name" />
                <conditional name="prot_naming">
                    <param name="method" type="select" label="Protein naming method">
                        <option value="none">No specific names for proteins</option>
                        <option value="regex">Based on mRNA name with regular expression</option>
                        <option value="attr">From GFF attribute</option>
                    </param>
                    <when value="none" />
                    <when value="regex">
                        <param argument="--re_protein_capture" label="Regex protein capture" type="text" help="Regular expression to capture groups in mRNA name to use in 'Regex protein'" value="^(.*?)-R([A-Z]+)$">
                            <expand macro="sanitized"/>
                        </param>
                        <param argument="--re_protein" label="Regex protein" type="text" help="Replacement string for the protein name using capturing groups defined in 'Regex protein capture'" value="$1-P$2">
                            <expand macro="sanitized"/>
                        </param>
                    </when>
                    <when value="attr">
                        <param argument="--attr_protein" label="Protein id attribute" type="text" help="Attribute containing the protein uniquename. It is searched at the mRNA level, and if not found at CDS level." value="protein_id"/>
                    </when>
                </conditional>
                <param name="interproscan" label="InterProScan results" optional="true" type="data" format="tsv,gff3" />
                <param name="eggnog" label="EggNOG-Mapper results" optional="true" type="data" format="tsv" />
                <param name="hectar" label="Hectar results" optional="true" type="data" format="tsv" />

                <conditional name="blast_cond">
                    <param name="blast_choice" type="select" label="Import Blast or Diamond results">
                        <option value="no" selected="true">No</option>
                        <option value="blast">Blast</option>
                        <option value="diamond">Diamond</option>
                    </param>
                    <when value="no" />
                    <when value="blast">
                        <param name="blast" label="Blast results (xml)" optional="false" type="data" format="xml" />
                        <param argument="--algorithm" label="Algorithm" optional="false" type="text" help="e.g. blastx, blastp" />
                        <param argument="--matrix" label="Matrix" optional="false" type="text" help="e.g. BLOSUM90, BLOSUM80, PAM100" />
                        <param argument="--database" label="database" optional="false" type="text" help="e.g. Non-reundant protein sequences (nr)" />
                    </when>
                    <when value="diamond">
                        <param name="diamond" label="Diamond results (xml)" optional="false" type="data" format="xml" />
                        <param argument="--algorithm" label="Algorithm" optional="false" type="text" help="e.g. blastx, blastp" />
                        <param argument="--matrix" label="Matrix" optional="false" type="text" help="e.g. BLOSUM90, BLOSUM80, PAM100" />
                        <param argument="--database" label="database" optional="false" type="text" help="e.g. Non-reundant protein sequences (nr)" />
                    </when>
                </conditional>

                <repeat name="expression" title="Expression data">
                    <param name="counts" label="Expression data tabular file" help="One line per mRNA, first column is mRNA ID, each other columns is an experimental condition. TPM values preferably." optional="false" type="data" format="tsv" />
                    <param name="public" label="Public access" type="boolean" truevalue="--public" falsevalue="" value="false" />
                    <repeat name="expression_replicas" title="Expression replicas">
                        <param argument="--replica-names" label="Name of a replica group" type="text"/>
                        <param argument="--replicas" label="Comma-separated list of columns of replicas (e.g. 1,2,3)" type="text" help="Comma-separated list of column positions, which are part of the same replica group. Columns not mentionned in any replica group will be considered to be a single replica experiment." />
                    </repeat>
                </repeat>
            </repeat>
        </repeat>
    </inputs>
    <outputs>
        <data format="genenotebook" name="gnb_db" label="GeneNoteBook on ${on_string}"/>
    </outputs>
    <tests>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org" />
                <param name="genome" value="genome.fa" />
            </repeat>
            <output name="gnb_db" file="output/genome.tar.bz2" compare="sim_size" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org" />
                <param name="public" value="true" />
                <param name="genome" value="genome.fa" />
            </repeat>
            <output name="gnb_db" file="output/genome.tar.bz2" compare="sim_size" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
                <repeat name="annots">
                    <param name="annotation" value="annot.gff" />
                    <param name="annot" value="My annotation" />
                </repeat>
            </repeat>
            <output name="gnb_db" file="output/genome_annot.tar.bz2" compare="sim_size" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
                <has_text text="addAnnotation succesfully inserted 5 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
                <repeat name="annots">
                    <param name="annotation" value="annot.gff" />
                    <param name="annot" value="My annotation" />
                    <param name="interproscan" value="interproscan.tsv" ftype="tsv" />
                </repeat>
            </repeat>
            <output name="gnb_db" file="output/genome_annot_ips.tar.bz2" compare="sim_size" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
                <has_text text="addAnnotation succesfully inserted 5 elements" />
                <has_text text="addInterproscan succesfully inserted 3 elements" />
            </assert_stdout>
        </test>
        <test>
            <param name="existing" value="output/genome.tar.bz2" />
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
            </repeat>
            <output name="gnb_db" file="output/genome_reload.tar.bz2" compare="sim_size" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
                <repeat name="annots">
                    <param name="annotation" value="annot.gff" />
                    <param name="annot" value="My annotation" />
                    <param name="interproscan" value="interproscan.tsv" ftype="tsv" />
                    <param name="eggnog" value="eggnog.tsv" ftype="tsv" />
                    <param name="hectar" value="hectar.tsv" ftype="tsv" />
                </repeat>
            </repeat>
            <output name="gnb_db" file="output/genome_annot_ips_en.tar.bz2" compare="sim_size" delta="15000" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
                <has_text text="addAnnotation succesfully inserted 5 elements" />
                <has_text text="addInterproscan succesfully inserted 3 elements" />
                <has_text text="addEggnog succesfully inserted 4 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
                <repeat name="annots">
                    <param name="annotation" value="annot.gff" />
                    <param name="annot" value="My annotation" />

                    <conditional name="prot_naming">
                        <param name="method" value="regex"/>
                        <param name="re_protein_capture" value="^(.*?)-T([0-9]+)$" />
                        <param name="re_protein" value="$1-P$2" />
                    </conditional>

                    <conditional name="blast_cond">
                        <param name="blast_choice" value="blast"/>
                        <param name="blast" value="blast.xml" ftype="xml" />
                        <param name="algorithm" value="blastx" />
                        <param name="matrix" value="BLOSUM80" />
                        <param name="database" value="Non-reundant garbage (nrg)" />
                    </conditional>

                    <repeat name="expression">
                        <param name="counts" value="exp.tsv" ftype="tsv" />
                        <param name="public" value="true" />
                    </repeat>
                </repeat>
            </repeat>
            <output name="gnb_db" file="output/genome_annot_ips_en_bl_exp.tar.bz2" compare="sim_size" delta="15000" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
                <has_text text="addAnnotation succesfully inserted 5 elements" />
                <has_text text="addSimilarSequence succesfully inserted 1 elements" />
                <has_text text="addExpression succesfully inserted 4 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
                <repeat name="annots">
                    <param name="annotation" value="annot.gff" />
                    <param name="annot" value="My annotation" />

                    <conditional name="prot_naming">
                        <param name="method" value="regex"/>
                        <param name="re_protein_capture" value="^(.*?)-T([0-9]+)$" />
                        <param name="re_protein" value="$1-P$2" />
                    </conditional>

                    <conditional name="blast_cond">
                        <param name="blast_choice" value="diamond"/>
                        <param name="diamond" value="blast.xml" ftype="xml" />
                        <param name="algorithm" value="blastx" />
                        <param name="matrix" value="BLOSUM62" />
                        <param name="database" value="Non-reundant garbage (nrg)" />
                    </conditional>

                    <repeat name="expression">
                        <param name="counts" value="exp.tsv" ftype="tsv" />
                        <repeat name="expression_replicas">
                            <param name="replicas" value="1,2" />
                            <param name="replica_names" value="ReplicaName1" />
                        </repeat>
                        <repeat name="expression_replicas">
                            <param name="replicas" value="3,4" />
                            <param name="replica_names" value="ReplicaName2" />
                        </repeat>
                    </repeat>
                </repeat>
            </repeat>
            <output name="gnb_db" file="output/genome_annot_ips_en_bl_exp.tar.bz2" compare="sim_size" delta="15000" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
                <has_text text="addAnnotation succesfully inserted 5 elements" />
                <has_text text="addSimilarSequence succesfully inserted 1 elements" />
                <has_text text="addExpression succesfully inserted 4 elements" />
            </assert_stdout>
        </test>
    </tests>
    <help><![CDATA[
Build a GeneNoteBook by loading data into a MongoDB database. The resulting dataset can then be displayed with the "GeneNoteBook viewer" interactive tool.

The resulting GeneNoteBook will contain the default user accounts created on the first launch. You are responsible to change them if you ever want to put your result online.

The current version is based on a forked version of GeneNoteBook including several various improvements (https://github.com/gogepp/genoboo/).
    ]]></help>
    <expand macro="citation" />
</tool>