view genenotebook_build.xml @ 3:1d59e907fa98 draft

planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/genenotebook commit 583e362682bf461f9701f7dc4986a428a67ca47a
author gga
date Wed, 20 Sep 2023 16:48:06 +0000
parents 5a6050937cb9
children ace14a8ac6e4
line wrap: on
line source

<?xml version="1.0"?>
<tool id="genenotebook_build" name="Build a GeneNoteBook" version="@WRAPPER_VERSION@" profile="21.05">
    <description></description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
@START_GNB@

#for genome in $genomes:
    genoboo add genome @CONNECT_INFO@ --name '${genome.name}' ${genome.public} '${genome.genome}';

    #for annot in $genome.annots:
        genoboo add annotation @CONNECT_INFO@ --name '${genome.name}'
        #if $annot.prot_naming.method == 'regex'
            #if $annot.prot_naming.re_protein:
            --re_protein '$annot.prot_naming.re_protein'
            #end if
            #if $annot.prot_naming.re_protein_capture:
            --re_protein_capture '$annot.prot_naming.re_protein_capture'
            #end if
        #elif $annot.prot_naming.method == "attr"
            #if $annot.prot_naming.protein_id_attr:
            --attr_protein '$annot.prot_naming.attr_protein'
            #end if
        #end if
        '${annot.annotation}';
        #if $annot.interproscan:
            genoboo add interproscan @CONNECT_INFO@ --format
            #if $annot.interproscan.is_of_type('gff3'):
                gff3
            #else:
                tsv
            #end if
            '${annot.interproscan}';
        #end if

        #if $annot.eggnog:
            genoboo add eggnog @CONNECT_INFO@ '${annot.eggnog}';
        #end if

        #if $annot.blast_cond.blast_choice == "blast":
            genoboo add blast @CONNECT_INFO@
                --format 'xml'
                --algorithm '${annot.blast_cond.algorithm}'
                --matrix '${annot.blast_cond.matrix}'
                --database '${annot.blast_cond.database}'
                '${annot.blast_cond.blast}';
        #elif $annot.blast_cond.blast_choice == "diamond":
            genoboo add diamond @CONNECT_INFO@
                --format 'xml'
                --algorithm '${annot.blast_cond.algorithm}'
                --matrix '${annot.blast_cond.matrix}'
                --database '${annot.blast_cond.database}'
                '${annot.blast_cond.diamond}';
        #end if

        #for exp in $annot.expression:
            genoboo add transcriptome @CONNECT_INFO@
                --sample-name '${exp.sample_name}'
                --replica-group '${exp.replica_group}'
                --sample-description '${exp.sample_description}'
                '${exp.counts}';
        #end for
    #end for
#end for

@ZIP_GNB@
    ]]></command>
    <inputs>
        <param name="existing" type="data" format="genenotebook" optional="true" label="Load data in an existing GeneNoteBook database" help="Leave empty to start a new database from scratch" />
        <repeat name="genomes" title="Genomes">
            <param argument="--name" label="Name" type="text" help="Reference genome name" />
            <param name="genome" label="Genome sequence" type="data" format="fasta" />
            <param name="public" label="Public access" type="boolean" truevalue="--public" falsevalue="" value="false" />
            <repeat name="annots" title="Annotations">
                <param name="annotation" label="Annotation" type="data" format="gff3" />
                <conditional name="prot_naming">
                    <param name="method" type="select" label="Protein naming method">
                        <option value="none">No specific names for proteins</option>
                        <option value="regex">Based on mRNA name with regular expression</option>
                        <option value="attr">From GFF attribute</option>
                    </param>
                    <when value="none" />
                    <when value="regex">
                        <param argument="--re_protein_capture" label="Regex protein capture" type="text" help="Regular expression to capture groups in mRNA name to use in 'Regex protein'" value="^(.*?)-R([A-Z]+)$">
                            <expand macro="sanitized"/>
                        </param>
                        <param argument="--re_protein" label="Regex protein" type="text" help="Replacement string for the protein name using capturing groups defined in 'Regex protein capture'" value="$1-P$2">
                            <expand macro="sanitized"/>
                        </param>
                    </when>
                    <when value="attr">
                        <param argument="--attr_protein" label="Protein id attribute" type="text" help="Attribute containing the protein uniquename. It is searched at the mRNA level, and if not found at CDS level." value="protein_id"/>
                    </when>
                </conditional>
                <param name="interproscan" label="InterProScan results" optional="true" type="data" format="tsv,gff3" />
                <param name="eggnog" label="EggNOG-Mapper results" optional="true" type="data" format="tsv" />

                <conditional name="blast_cond">
                    <param name="blast_choice" type="select" label="Import Blast or Diamond results">
                        <option value="no" selected="true">No</option>
                        <option value="blast">Blast</option>
                        <option value="diamond">Diamond</option>
                    </param>
                    <when value="no" />
                    <when value="blast">
                        <param name="blast" label="Blast results (xml)" optional="false" type="data" format="xml" />
                        <param argument="--algorithm" label="Algorithm" optional="false" type="text" help="e.g. blastx, blastp" />
                        <param argument="--matrix" label="Matrix" optional="false" type="text" help="e.g. BLOSUM90, BLOSUM80, PAM100" />
                        <param argument="--database" label="database" optional="false" type="text" help="e.g. Non-reundant protein sequences (nr)" />
                    </when>
                    <when value="diamond">
                        <param name="diamond" label="Diamond results (xml)" optional="false" type="data" format="xml" />
                        <param argument="--algorithm" label="Algorithm" optional="false" type="text" help="e.g. blastx, blastp" />
                        <param argument="--matrix" label="Matrix" optional="false" type="text" help="e.g. BLOSUM90, BLOSUM80, PAM100" />
                        <param argument="--database" label="database" optional="false" type="text" help="e.g. Non-reundant protein sequences (nr)" />
                    </when>
                </conditional>

                <repeat name="expression" title="Expression data">
                    <param name="counts" label="Expression data" optional="true" type="data" format="tsv" />
                    <param argument="--sample-name" name="sample_name" label="Unique sample name" type="text" />
                    <param argument="--replica-group" name="replica_group" label="Identifier to group samples that belong to the same experiment" type="text" />
                    <param argument="--sample-description" name="sample_description" label="Description of the experiment" type="text" />
                </repeat>
            </repeat>
        </repeat>
    </inputs>
    <outputs>
        <data format="genenotebook" name="gnb_db" label="GeneNoteBook on ${on_string}"/>
    </outputs>
    <tests>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org" />
                <param name="genome" value="genome.fa" />
            </repeat>
            <output name="gnb_db" file="output/genome.tar.bz2" compare="sim_size" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org" />
                <param name="public" value="true" />
                <param name="genome" value="genome.fa" />
            </repeat>
            <output name="gnb_db" file="output/genome.tar.bz2" compare="sim_size" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
                <repeat name="annots">
                    <param name="annotation" value="annot.gff" />
                </repeat>
            </repeat>
            <output name="gnb_db" file="output/genome_annot.tar.bz2" compare="sim_size" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
                <has_text text="addAnnotation succesfully inserted 5 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
                <repeat name="annots">
                    <param name="annotation" value="annot.gff" />
                    <param name="interproscan" value="interproscan.tsv" ftype="tsv" />
                </repeat>
            </repeat>
            <output name="gnb_db" file="output/genome_annot_ips.tar.bz2" compare="sim_size" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
                <has_text text="addAnnotation succesfully inserted 5 elements" />
                <has_text text="addInterproscan succesfully inserted 3 elements" />
            </assert_stdout>
        </test>
        <test>
            <param name="existing" value="output/genome.tar.bz2" />
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
            </repeat>
            <output name="gnb_db" file="output/genome_reload.tar.bz2" compare="sim_size" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
                <repeat name="annots">
                    <param name="annotation" value="annot.gff" />
                    <param name="interproscan" value="interproscan.tsv" ftype="tsv" />
                    <param name="eggnog" value="eggnog.tsv" ftype="tsv" />
                </repeat>
            </repeat>
            <output name="gnb_db" file="output/genome_annot_ips_en.tar.bz2" compare="sim_size" delta="15000" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
                <has_text text="addAnnotation succesfully inserted 5 elements" />
                <has_text text="addInterproscan succesfully inserted 3 elements" />
                <has_text text="addEggnog succesfully inserted 4 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
                <repeat name="annots">
                    <param name="annotation" value="annot.gff" />

                    <conditional name="prot_naming">
                        <param name="method" value="regex"/>
                        <param name="re_protein_capture" value="^(.*?)-T([0-9]+)$" />
                        <param name="re_protein" value="$1-P$2" />
                    </conditional>

                    <conditional name="blast_cond">
                        <param name="blast_choice" value="blast"/>
                        <param name="blast" value="blast.xml" ftype="xml" />
                        <param name="algorithm" value="blastx" />
                        <param name="matrix" value="BLOSUM80" />
                        <param name="database" value="Non-reundant garbage (nrg)" />
                    </conditional>

                    <repeat name="expression">
                        <param name="counts" value="exp.tsv" ftype="tsv" />
                        <param name="sample_name" value="Sample1_rep1" />
                        <param name="replica_group" value="Sample1" />
                        <param name="sample_description" value="Pointless experiment" />
                    </repeat>
                </repeat>
            </repeat>
            <output name="gnb_db" file="output/genome_annot_ips_en_bl_exp.tar.bz2" compare="sim_size" delta="15000" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
                <has_text text="addAnnotation succesfully inserted 5 elements" />
                <has_text text="addSimilarSequence succesfully inserted 1 elements" />
                <has_text text="addTranscriptome succesfully inserted 1 elements" />
            </assert_stdout>
        </test>
        <test>
            <repeat name="genomes">
                <param name="name" value="Test org 2" />
                <param name="genome" value="genome.fa" />
                <repeat name="annots">
                    <param name="annotation" value="annot.gff" />

                    <conditional name="prot_naming">
                        <param name="method" value="regex"/>
                        <param name="re_protein_capture" value="^(.*?)-T([0-9]+)$" />
                        <param name="re_protein" value="$1-P$2" />
                    </conditional>

                    <conditional name="blast_cond">
                        <param name="blast_choice" value="diamond"/>
                        <param name="diamond" value="blast.xml" ftype="xml" />
                        <param name="algorithm" value="blastx" />
                        <param name="matrix" value="BLOSUM62" />
                        <param name="database" value="Non-reundant garbage (nrg)" />
                    </conditional>

                    <repeat name="expression">
                        <param name="counts" value="exp.tsv" ftype="tsv" />
                        <param name="sample_name" value="Sample1_rep1" />
                        <param name="replica_group" value="Sample1" />
                        <param name="sample_description" value="Pointless experiment" />
                    </repeat>
                </repeat>
            </repeat>
            <output name="gnb_db" file="output/genome_annot_ips_en_bl_exp.tar.bz2" compare="sim_size" delta="15000" />
            <assert_stdout>
                <has_text text="addGenome succesfully inserted 1 elements" />
                <has_text text="addAnnotation succesfully inserted 5 elements" />
                <has_text text="addSimilarSequence succesfully inserted 1 elements" />
                <has_text text="addTranscriptome succesfully inserted 1 elements" />
            </assert_stdout>
        </test>
    </tests>
    <help><![CDATA[
Build a GeneNoteBook by loading data into a MongoDB database. The resulting dataset can then be displayed with the "GeneNoteBook viewer" interactive tool.

The resulting GeneNoteBook will contain the default user accounts created on the first launch. You are responsible to change them if you ever want to put your result online.

The current version is based on a forked version of GeneNoteBook including several various improvements (https://github.com/gogepp/genoboo/).
    ]]></help>
    <expand macro="citation" />
</tool>