view genenotebook_build.xml @ 4:ace14a8ac6e4 draft

planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/genenotebook commit 81226a2f347b2dc273546311e7269c07f0b0f485
author gga
date Tue, 10 Oct 2023 15:29:02 +0000
parents 1d59e907fa98
children bd94e502872a
line wrap: on
line source

<?xml version="1.0"?>
<tool id="genenotebook_build" name="Build a GeneNoteBook" version="@WRAPPER_VERSION@" profile="21.05">
    <description></description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
@START_GNB@

#for genome in $genomes:
    genoboo add genome @CONNECT_INFO@ --name '${genome.name}' ${genome.public} '${genome.genome}';

    #for annot in $genome.annots:
        genoboo add annotation @CONNECT_INFO@ --name '${genome.name}'
        #if $annot.prot_naming.method == 'regex'
            #if $annot.prot_naming.re_protein:
            --re_protein '$annot.prot_naming.re_protein'
            #end if
            #if $annot.prot_naming.re_protein_capture:
            --re_protein_capture '$annot.prot_naming.re_protein_capture'
            #end if
        #elif $annot.prot_naming.method == "attr"
            #if $annot.prot_naming.protein_id_attr:
            --attr_protein '$annot.prot_naming.attr_protein'
            #end if
        #end if
        '${annot.annotation}';
        #if $annot.interproscan:
            genoboo add interproscan @CONNECT_INFO@ --format
            #if $annot.interproscan.is_of_type('gff3'):
                gff3
            #else:
                tsv
            #end if
            '${annot.interproscan}';
        #end if

        #if $annot.eggnog:
            genoboo add eggnog @CONNECT_INFO@ '${annot.eggnog}';
        #end if

        #if $annot.hectar:
            genoboo add hectar @CONNECT_INFO@ '${annot.hectar}';
        #end if

        #if $annot.blast_cond.blast_choice == "blast":
            genoboo add blast @CONNECT_INFO@
                --format 'xml'
                --algorithm '${annot.blast_cond.algorithm}'
                --matrix '${annot.blast_cond.matrix}'
                --database '${annot.blast_cond.database}'
                '${annot.blast_cond.blast}';
        #elif $annot.blast_cond.blast_choice == "diamond":
            genoboo add diamond @CONNECT_INFO@
                --format 'xml'
                --algorithm '${annot.blast_cond.algorithm}'
                --matrix '${annot.blast_cond.matrix}'
                --database '${annot.blast_cond.database}'
                '${annot.blast_cond.diamond}';
        #end if

        #for exp in $annot.expression:
                genoboo add expression @CONNECT_INFO@
                    --sample-description ''
                    ${exp.public}
                    #for replica in $exp.expression_replicas:
                        --replicas '${replica.replicas}'
                        --replica-names '${replica.replica_names}'
                    #end for
                    --
                    '${exp.counts}';
        #end for
    #end for
#end for

@ZIP_GNB@
    ]]></command>
    <inputs>
        <param name="existing" type="data" format="genenotebook" optional="true" label="Load data in an existing GeneNoteBook database" help="Leave empty to start a new database from scratch" />
        <repeat name="genomes" title="Genomes">
            <param argument="--name" label="Name" type="text" help="Reference genome name" />
            <param name="genome" label="Genome sequence" type="data" format="fasta" />
            <param name="public" label="Public access" type="boolean" truevalue="--public" falsevalue="" value="false" />
            <repeat name="annots" title="Annotations">
                <param name="annotation" label="Annotation" type="data" format="gff3" />
                <conditional name="prot_naming">
                    <param name="method" type="select" label="Protein naming method">
                        <option value="none">No specific names for proteins</option>
                        <option value="regex">Based on mRNA name with regular expression</option>
                        <option value="attr">From GFF attribute</option>
                    </param>
                    <when value="none" />
                    <when value="regex">
                        <param argument="--re_protein_capture" label="Regex protein capture" type="text" help="Regular expression to capture groups in mRNA name to use in 'Regex protein'" value="^(.*?)-R([A-Z]+)$">
                            <expand macro="sanitized"/>
                        </param>
                        <param argument="--re_protein" label="Regex protein" type="text" help="Replacement string for the protein name using capturing groups defined in 'Regex protein capture'" value="$1-P$2">
                            <expand macro="sanitized"/>
                        </param>
                    </when>
                    <when value="attr">
                        <param argument="--attr_protein" label="Protein id attribute" type="text" help="Attribute containing the protein uniquename. It is searched at the mRNA level, and if not found at CDS level." value="protein_id"/>
                    </when>
                </conditional>
                <param name="interproscan" label="InterProScan results" optional="true" type="data" format="tsv,gff3" />
                <param name="eggnog" label="EggNOG-Mapper results" optional="true" type="data" format="tsv" />
                <param name="hectar" label="Hectar results" optional="true" type="data" format="tsv" />

                <conditional name="blast_cond">
                    <param name="blast_choice" type="select" label="Import Blast or Diamond results">
                        <option value="no" selected="true">No</option>
                        <option value="blast">Blast</option>
                        <option value="diamond">Diamond</option>
                    </param>
                    <when value="no" />
                    <when value="blast">
                        <param name="blast" label="Blast results (xml)" optional="false" type="data" format="xml" />
                        <param argument="--algorithm" label="Algorithm" optional="false" type="text" help="e.g. blastx, blastp" />
                        <param argument="--matrix" label="Matrix" optional="false" type="text" help="e.g. BLOSUM90, BLOSUM80, PAM100" />
                        <param argument="--database" label="database" optional="false" type="text" help="e.g. Non-reundant protein sequences (nr)" />
                    </when>
                    <when value="diamond">
                        <param name="diamond" label="Diamond results (xml)" optional="false" type="data" format="xml" />
                        <param argument="--algorithm" label="Algorithm" optional="false" type="text" help="e.g. blastx, blastp" />
                        <param argument="--matrix" label="Matrix" optional="false" type="text" help="e.g. BLOSUM90, BLOSUM80, PAM100" />
                        <param argument="--database" label="database" optional="false" type="text" help="e.g. Non-reundant protein sequences (nr)" />
                    </when>
                </conditional>

                <repeat name="expression" title="Expression data">
                    <param name="counts" label="Expression data tabular file" help="One line per mRNA, first column is mRNA ID, each other columns is an experimental condition. TPM values preferably." optional="false" type="data" format="tsv" />
                    <param name="public" label="Public access" type="boolean" truevalue="--public" falsevalue="" value="false" />
                    <repeat name="expression_replicas" title="Expression replicas">
                        <param argument="--replica-names" label="Name of a replica group" type="text"/>
                        <param argument="--replicas" label="Comma-separated list of columns of replicas (e.g. 1,2,3)" type="text" help="Comma-separated list of column positions, which are part of the same replica group. Columns not mentionned in any replica group will be considered to be a single replica experiment." />
                    </repeat>
                </repeat>
            </repeat>
        </repeat>
    </inputs>
    <outputs>
        <data format="genenotebook" name="gnb_db" label="GeneNoteBook on ${on_string}"/>
    </outputs>
    <tests>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org" />
                <param name="genome" value="genome.fa" />
            </repeat>
            <output name="gnb_db" file="output/genome.tar.bz2" compare="sim_size" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org" />
                <param name="public" value="true" />
                <param name="genome" value="genome.fa" />
            </repeat>
            <output name="gnb_db" file="output/genome.tar.bz2" compare="sim_size" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
                <repeat name="annots">
                    <param name="annotation" value="annot.gff" />
                </repeat>
            </repeat>
            <output name="gnb_db" file="output/genome_annot.tar.bz2" compare="sim_size" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
                <has_text text="addAnnotation succesfully inserted 5 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
                <repeat name="annots">
                    <param name="annotation" value="annot.gff" />
                    <param name="interproscan" value="interproscan.tsv" ftype="tsv" />
                </repeat>
            </repeat>
            <output name="gnb_db" file="output/genome_annot_ips.tar.bz2" compare="sim_size" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
                <has_text text="addAnnotation succesfully inserted 5 elements" />
                <has_text text="addInterproscan succesfully inserted 3 elements" />
            </assert_stdout>
        </test>
        <test>
            <param name="existing" value="output/genome.tar.bz2" />
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
            </repeat>
            <output name="gnb_db" file="output/genome_reload.tar.bz2" compare="sim_size" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
                <repeat name="annots">
                    <param name="annotation" value="annot.gff" />
                    <param name="interproscan" value="interproscan.tsv" ftype="tsv" />
                    <param name="eggnog" value="eggnog.tsv" ftype="tsv" />
                    <param name="hectar" value="hectar.tsv" ftype="tsv" />
                </repeat>
            </repeat>
            <output name="gnb_db" file="output/genome_annot_ips_en.tar.bz2" compare="sim_size" delta="15000" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
                <has_text text="addAnnotation succesfully inserted 5 elements" />
                <has_text text="addInterproscan succesfully inserted 3 elements" />
                <has_text text="addEggnog succesfully inserted 4 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
                <repeat name="annots">
                    <param name="annotation" value="annot.gff" />

                    <conditional name="prot_naming">
                        <param name="method" value="regex"/>
                        <param name="re_protein_capture" value="^(.*?)-T([0-9]+)$" />
                        <param name="re_protein" value="$1-P$2" />
                    </conditional>

                    <conditional name="blast_cond">
                        <param name="blast_choice" value="blast"/>
                        <param name="blast" value="blast.xml" ftype="xml" />
                        <param name="algorithm" value="blastx" />
                        <param name="matrix" value="BLOSUM80" />
                        <param name="database" value="Non-reundant garbage (nrg)" />
                    </conditional>

                    <repeat name="expression">
                        <param name="counts" value="exp.tsv" ftype="tsv" />
                        <param name="public" value="true" />
                    </repeat>
                </repeat>
            </repeat>
            <output name="gnb_db" file="output/genome_annot_ips_en_bl_exp.tar.bz2" compare="sim_size" delta="15000" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
                <has_text text="addAnnotation succesfully inserted 5 elements" />
                <has_text text="addSimilarSequence succesfully inserted 1 elements" />
                <has_text text="addExpression succesfully inserted 4 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
                <repeat name="annots">
                    <param name="annotation" value="annot.gff" />

                    <conditional name="prot_naming">
                        <param name="method" value="regex"/>
                        <param name="re_protein_capture" value="^(.*?)-T([0-9]+)$" />
                        <param name="re_protein" value="$1-P$2" />
                    </conditional>

                    <conditional name="blast_cond">
                        <param name="blast_choice" value="diamond"/>
                        <param name="diamond" value="blast.xml" ftype="xml" />
                        <param name="algorithm" value="blastx" />
                        <param name="matrix" value="BLOSUM62" />
                        <param name="database" value="Non-reundant garbage (nrg)" />
                    </conditional>

                    <repeat name="expression">
                        <param name="counts" value="exp.tsv" ftype="tsv" />
                        <repeat name="expression_replicas">
                            <param name="replicas" value="1,2" />
                            <param name="replica_names" value="ReplicaName1" />
                        </repeat>
                        <repeat name="expression_replicas">
                            <param name="replicas" value="3,4" />
                            <param name="replica_names" value="ReplicaName2" />
                        </repeat>
                    </repeat>
                </repeat>
            </repeat>
            <output name="gnb_db" file="output/genome_annot_ips_en_bl_exp.tar.bz2" compare="sim_size" delta="15000" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
                <has_text text="addAnnotation succesfully inserted 5 elements" />
                <has_text text="addSimilarSequence succesfully inserted 1 elements" />
                <has_text text="addExpression succesfully inserted 4 elements" />
            </assert_stdout>
        </test>
    </tests>
    <help><![CDATA[
Build a GeneNoteBook by loading data into a MongoDB database. The resulting dataset can then be displayed with the "GeneNoteBook viewer" interactive tool.

The resulting GeneNoteBook will contain the default user accounts created on the first launch. You are responsible to change them if you ever want to put your result online.

The current version is based on a forked version of GeneNoteBook including several various improvements (https://github.com/gogepp/genoboo/).
    ]]></help>
    <expand macro="citation" />
</tool>