view orthofinder_only_groups.xml @ 9:b3d25fae5389 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/orthofinder commit 179e3fd2aeb66484e49e6370e9507e24d118f4ea
author iuc
date Mon, 15 Jan 2024 10:18:14 +0000
parents 3b974afde673
children
line wrap: on
line source

<tool name="OrthoFinder" id="orthofinder_onlygroups" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
    <description>finds orthogroups in a set of proteomes</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <xrefs>
        <xref type="bio.tools">OrthoFinder</xref>
    </xrefs>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">orthofinder</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        #import re
        ## prepare inputs
        #if $init.start == "fasta":
            #set $infiles = ""
            #for $input in $init.input_fasta
                #set $identifier=re.sub('[^\w\-\s\.]', '_', str($input.element_identifier))
                ln -s '$input' '${identifier}.fasta' &&
                #set $infiles = $infiles + str($identifier) + ","
            #end for
            #set $infiles = $infiles[:-1]
        #elif $init.start == "blast":
            #set $infilesbl = ""
            #for $input in $init.input_blast_out
                #set $identifier=re.sub('[^\w\-\s\.]', '_', str($input.element_identifier))
                ln -s '$input' '$identifier' &&
                #set $infilesbl = $infilesbl + str($identifier) + ","
            #end for
            #set $infilesbl = $infilesbl[:-1]

            #set $infilesfa = ""
            #for $input in $init.input_blast_fa
                #set $identifier=re.sub('[^\w\-\s\.]', '_', str($input.element_identifier))
                ln -s '$input' '$identifier' &&
                #set $infilesfa = $infilesfa + str($identifier) + ","
            #end for
            #set $infilesfa = $infilesfa[:-1]

            #set $identifier=re.sub('[^\w\-\s\.]', '_', str($init.specIDs.element_identifier))
            ln -s $init.specIDs $identifier &&

            #set $identifier=re.sub('[^\w\-\s\.]', '_', str($init.seqIDs.element_identifier))
            ln -s $init.seqIDs $identifier &&
        #end if

        ## start Orthofinder
        orthofinder
        #if $init.start == "fasta":
            -f .
            -S $init.search.search_program
            $init.input_type
        #elif $init.start == "blast":
            -b .
        #end if

        -I $inflation

        #if $trees.run_mode == "full":
            -M '${trees.tree_method.method}'
            #if $trees.tree_method.method == "msa":
                -A '${trees.tree_method.msa_program}'
                -T '${trees.tree_method.msa_tree_program}'
            #end if
        #else:
            $trees.run_mode
        #end if

        -t \${GALAXY_SLOTS:-1} -a \${GALAXY_SLOTS:-1} &&

        mv OrthoFinder/Results_* results
        #if $init.start == "fasta":
            #if $init.search.search_program == "blast":
                #if $init.search.keepblastout:
                    && mkdir -p results/WorkingDirectory/blast results/WorkingDirectory/fa &&
                    mv results/WorkingDirectory/Blast* results/WorkingDirectory/blast/ &&
                    mv results/WorkingDirectory/*.fa results/WorkingDirectory/fa/
                #end if
            #end if
        #end if
    ]]></command>
    <inputs>
        <!-- Control where Orthofinder starts -->
        <conditional name="init">
            <param name="start" type="select" label="Orthofinder starting point" help="OrthoFinder can be run in 2 steps. Choose 'From fasta files' to run OrthoFinder from scratch or 'From blast results' if you have all the blast results from a previous OrthoFinder run.">
                <option value="fasta" selected="true">From fasta files</option>
                <option value="blast">From blast results</option>
            </param>

            <when value="fasta">
                <param name="input_fasta" type="data" format="fasta" multiple="true" label="Select input fasta files" help="One fasta file per species; species and sequence names in the results will remain the same than in the input files."/>
                <param name="input_type" type="select" label="Input contains nucleotide or amino acid sequences?">
                    <option value="">Amino acid</option>
                    <option value="-d">Nucleotide</option>
                </param>
                <conditional name="search">
                    <param name="search_program" type="select" label="Sequence search program">
                        <option value="diamond" selected="true">Diamond (faster)</option>
                        <option value="diamond_ultra_sens">Diamond ultra-sensitive</option>
                        <option value="blast">Blast</option>
                        <option value="blast_gz">Blast_gz - blast results gzipped</option>
                    </param>
                    <when value="blast">
                        <param name="keepblastout" type="boolean" checked="true" label="Do you want to get the blast results?" help="Used to re-run OrthoFinder from pre-computed blast results"/>
                    </when>
                    <when value="diamond"></when>
                    <when value="diamond_ultra_sens"></when>
                    <when value="blast_gz"></when>
                </conditional>
            </when>

            <when value="blast">
                <param name="input_blast_out" type="data_collection" collection_type="list" format="txt" label="Select the pre-computed blast files" help="blastX_Y.txt files from the blast output files of a previous OrthoFinder run." />
                <param name="input_blast_fa" type="data_collection" collection_type="list" format="fasta" label="Select the fasta files" help="= SpeciesX.fa files from the blast output files of a previous OrthoFinder run." />
                <param name="specIDs" type="data" format="txt" label="Select the SpeciesIds file" help="SpeciesIDs.txt file from the blast output files of a previous OrthoFinder run."/>
                <param name="seqIDs" type="data" format="txt" label="Select the SequencesIds file" help="SequencesIDs.txt file from the blast output files of a previous OrthoFinder run."/>
            </when>
        </conditional>

        <conditional name="trees">
            <param name="run_mode" type="select" label="Orthofinder run mode">
                <option value="full" selected="true">Full run (including gene trees)</option>
                <option value="-og">Stop after inferring orthogroups (no gene trees)</option>
            </param>

            <when value="full">
                <conditional name="tree_method">
                    <param name="method" type="select" label="Method for gene tree inference">
                        <option value="dendroblast" selected="true">Dendroblast (faster)</option>
                        <option value="msa">MSA (Multiple Sequence Aligments)</option>
                    </param>

                    <when value="msa">
                        <param name="msa_program" type="select" label="MSA program">
                            <option value="mafft" selected="true">Mafft</option>
                            <option value="muscle">Muscle</option>
                        </param>
                        <param name="msa_tree_program" type="select" label="Tree inference method">
                            <option value="fasttree" selected="true">FastTree (recommended)</option>
                            <option value="raxml">raxml</option>
                            <option value="raxml-ng">raxml-ng</option>
                            <option value="iqtree">iqtree</option>
                        </param>
                    </when>

                    <when value="dendroblast"/>
                </conditional>
            </when>

            <when value="-og"/>
        </conditional>

        <param argument="-I" name="inflation" type="float" value="1.5" label="Inflation parameter" help="Modify inflation parameter for MCL. Not recommended." />
        <param name="output_duplications" type="boolean" checked="false" label="Generate output about gene duplication events"/>
    </inputs>
    <outputs>
        <!-- Orthogroups results -->
        <data format="txt" name="orthogroups1" label="OrthoFinder on ${on_string}: orthogroups (txt)" from_work_dir="results/Orthogroups/Orthogroups.txt" />
        <data format="tsv" name="orthogroups2" label="OrthoFinder on ${on_string}: orthogroups (tsv)" from_work_dir="results/Orthogroups/Orthogroups.tsv" />
        <data format="tsv" name="hogs" label="OrthoFinder on ${on_string}: hierarchical orthogroups (tsv)" from_work_dir="results/Phylogenetic_Hierarchical_Orthogroups/N0.tsv" >
            <filter>trees['run_mode'] == "full"</filter>
        </data>
        <data format="tsv" name="specs_overlap" label="OrthoFinder on ${on_string}: species overlaps" from_work_dir="results/Comparative_Genomics_Statistics/Orthogroups_SpeciesOverlaps.tsv" />
        <data format="tsv" name="unassigned_genes" label="OrthoFinder on ${on_string}: unassigned genes" from_work_dir="results/Orthogroups/Orthogroups_UnassignedGenes.tsv" />
        <data format="tsv" name="stat_overall" label="OrthoFinder on ${on_string}: overall comparative genomics statistics" from_work_dir="results/Comparative_Genomics_Statistics/Statistics_Overall.tsv" />
        <data format="tsv" name="stat_specs" label="OrthoFinder on ${on_string}: per species comparative genomics statistics" from_work_dir="results/Comparative_Genomics_Statistics/Statistics_PerSpecies.tsv" />

        <!-- working directory : blast outputs-->
        <collection name="wdblast" type="list" label="OrthoFinder on ${on_string}: blast outputs">
            <discover_datasets pattern="__name_and_ext__" directory="results/WorkingDirectory/blast" />
            <filter>init['start'] == "fasta" and init['search']['search_program'] == "blast" and init['search']['keepblastout']</filter>
        </collection>
        <collection name="wdfasta" type="list" label="OrthoFinder on ${on_string}: fasta from blast" >
            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fa" directory="results/WorkingDirectory/fa" format="fasta" />
            <filter>init['start'] == "fasta" and init['search']['search_program'] == "blast" and init['search']['keepblastout']</filter>
        </collection>
        <data format="txt" name="SpeciesIDs" label="OrthoFinder on ${on_string}: SpeciesIDs" from_work_dir="results/WorkingDirectory/SpeciesIDs.txt" >
            <filter>init['start'] == "fasta" and init['search']['search_program'] == "blast" and init['search']['keepblastout']</filter>
        </data>
        <data format="txt" name="SequenceIDs" label="OrthoFinder on ${on_string}: SequencesIDs" from_work_dir="results/WorkingDirectory/SequenceIDs.txt" >
            <filter>init['start'] == "fasta" and init['search']['search_program'] == "blast" and init['search']['keepblastout']</filter>
        </data>

        <data format="newick" name="species_tree" label="OrthoFinder on ${on_string}: species tree" from_work_dir="results/Species_Tree/SpeciesTree_rooted.txt">
            <filter>trees['run_mode'] == "full"</filter>
        </data>
        <data format="newick" name="species_tree_label" label="OrthoFinder on ${on_string}: species tree with node labels" from_work_dir="results/Species_Tree/SpeciesTree_rooted_node_labels.txt">
            <filter>trees['run_mode'] == "full"</filter>
        </data>
        <collection name="genetrees" type="list" label="OrthoFinder on ${on_string}: gene trees">
            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.txt" directory="results/Gene_Trees" format="newick" />
            <filter>trees['run_mode'] == "full"</filter>
        </collection>

        <data format="newick" name="species_tree_duplications" label="OrthoFinder on ${on_string}: species tree with duplication events" from_work_dir="results/Gene_Duplication_Events/SpeciesTree_Gene_Duplications_0.5_Support.txt">
            <filter>trees['run_mode'] == "full" and output_duplications</filter>
        </data>
        <data format="tsv" name="duplications" label="OrthoFinder on ${on_string}: duplication events" from_work_dir="results/Gene_Duplication_Events/Duplications.tsv">
            <filter>trees['run_mode'] == "full" and output_duplications</filter>
        </data>
        <data format="tsv" name="duplications_per_orthogroup" label="OrthoFinder on ${on_string}: duplications per orthogroup" from_work_dir="results/Comparative_Genomics_Statistics/Duplications_per_Orthogroup.tsv">
            <filter>trees['run_mode'] == "full" and output_duplications</filter>
        </data>
        <data format="tsv" name="duplications_per_species_tree_node" label="OrthoFinder on ${on_string}: duplications per species tree node" from_work_dir="results/Comparative_Genomics_Statistics/Duplications_per_Species_Tree_Node.tsv">
            <filter>trees['run_mode'] == "full" and output_duplications</filter>
        </data>
        <collection name="resolved_trees" type="list" label="OrthoFinder on ${on_string}: resolved gene trees">
            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.txt" directory="results/Resolved_Gene_Trees" format="newick" />
            <filter>trees['run_mode'] == "full" and output_duplications</filter>
        </collection>
    </outputs>
    <tests>
        <!-- no trees + diamond + input files have no extension fasta/faa/fa -->
        <test expect_num_outputs="6">
            <conditional name="init">
                <param name="start" value="fasta" />
                <param name="input_fasta" ftype="fasta" value="inputs/no_fa_ext/AcAcaud_trinity,inputs/no_fa_ext/AmAmphi_trinity,inputs/no_fa_ext/ApApomp_trinity,inputs/no_fa_ext/AsAsp1_trinity" />
                <conditional name="search">
                    <param name="search_program" value="diamond"/>
                </conditional>
            </conditional>
            <conditional name="trees">
                <param name="run_mode" value="-og" />
            </conditional>
            <param name="inflation" value="1.5" />
            <output name="specs_overlap">
                <assert_contents>
                    <has_text text="AcAcaud_trinity"/>
                    <has_text text="AmAmphi_trinity"/>
                    <has_text text="ApApomp_trinity"/>
                    <has_text text="AsAsp1_trinity"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="unassigned_genes">
                <assert_contents>
                    <has_text text="AcAcaud_trinity"/>
                    <has_text text="AmAmphi_trinity"/>
                    <has_text text="ApApomp_trinity"/>
                    <has_text text="AsAsp1_trinity"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="stat_overall">
                <assert_contents>
                    <has_text text="Number of genes in orthogroups"/>
                    <has_text text="Number of unassigned genes"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_text text="G50 (assigned genes)"/>
                    <has_text text="G50 (all genes)"/>
                    <has_text text="O50 (assigned genes)"/>
                    <has_text text="O50 (all genes)"/>
                </assert_contents>
            </output>
            <output name="stat_specs">
                <assert_contents>
                    <has_text text="AcAcaud_trinity"/>
                    <has_text text="AmAmphi_trinity"/>
                    <has_text text="ApApomp_trinity"/>
                    <has_text text="AsAsp1_trinity"/>
                    <has_text text="Number of genes per-species in orthogroup"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
        </test>
        <!-- no trees + diamond + input files have extension fasta/faa/fa -->
        <test expect_num_outputs="6">
            <conditional name="init">
                <param name="start" value="fasta" />
                <param name="input_fasta" ftype="fasta" value="inputs/proteomes/Mycoplasma_agalactiae.faa,inputs/proteomes/Mycoplasma_gallisepticum.faa,inputs/proteomes/Mycoplasma_genitalium.faa,inputs/proteomes/Mycoplasma_hyopneumoniae.faa" />
                <conditional name="search">
                    <param name="search_program" value="diamond"/>
                </conditional>
            </conditional>
            <conditional name="trees">
                <param name="run_mode" value="-og" />
            </conditional>
            <param name="inflation" value="1.5" />
            <output name="specs_overlap">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="unassigned_genes">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="stat_overall">
                <assert_contents>
                    <has_text text="Number of genes in orthogroups"/>
                    <has_text text="Number of unassigned genes"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_text text="G50 (assigned genes)"/>
                    <has_text text="G50 (all genes)"/>
                    <has_text text="O50 (assigned genes)"/>
                    <has_text text="O50 (all genes)"/>
                </assert_contents>
            </output>
            <output name="stat_specs">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_text text="Number of genes per-species in orthogroup"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
        </test>
        <!-- no trees + diamond + input files have extension fasta/faa/fa + keep blast out -->
        <test expect_num_outputs="10">
            <conditional name="init">
                <param name="start" value="fasta" />
                <param name="input_fasta" ftype="fasta" value="inputs/proteomes/Mycoplasma_agalactiae.faa,inputs/proteomes/Mycoplasma_gallisepticum.faa,inputs/proteomes/Mycoplasma_genitalium.faa,inputs/proteomes/Mycoplasma_hyopneumoniae.faa" />
                <conditional name="search">
                    <param name="search_program" value="blast"/>
                    <param name="keepblastout" value="true" />
                </conditional>
            </conditional>
            <conditional name="trees">
                <param name="run_mode" value="-og" />
            </conditional>
            <param name="inflation" value="1.5" />
            <output name="specs_overlap">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="unassigned_genes">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="stat_overall">
                <assert_contents>
                    <has_text text="Number of genes in orthogroups"/>
                    <has_text text="Number of unassigned genes"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_text text="G50 (assigned genes)"/>
                    <has_text text="G50 (all genes)"/>
                    <has_text text="O50 (assigned genes)"/>
                    <has_text text="O50 (all genes)"/>
                </assert_contents>
            </output>
            <output name="stat_specs">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_text text="Number of genes per-species in orthogroup"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="SequenceIDs" value="inputs/blastids/SequenceIDs.txt" />
            <output_collection name="wdfasta" type="list" count="4"/>
            <output_collection name="wdblast" type="list" count="16"/>
        </test>
        <!-- no trees + blast + input files have extension fasta/faa/fa-->
        <test expect_num_outputs="6">
            <conditional name="init">
                <param name="start" value="fasta" />
                <param name="input_fasta" ftype="fasta" value="inputs/proteomes/Mycoplasma_agalactiae.faa,inputs/proteomes/Mycoplasma_gallisepticum.faa,inputs/proteomes/Mycoplasma_genitalium.faa,inputs/proteomes/Mycoplasma_hyopneumoniae.faa" />
                <conditional name="search">
                    <param name="search_program" value="blast"/>
                    <param name="keepblastout" value="false" />
                </conditional>
            </conditional>
            <conditional name="trees">
                <param name="run_mode" value="-og" />
            </conditional>
            <param name="inflation" value="1.5" />
            <output name="specs_overlap">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="unassigned_genes">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="stat_overall">
                <assert_contents>
                    <has_text text="Number of genes in orthogroups"/>
                    <has_text text="Number of unassigned genes"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_text text="G50 (assigned genes)"/>
                    <has_text text="G50 (all genes)"/>
                    <has_text text="O50 (assigned genes)"/>
                    <has_text text="O50 (all genes)"/>
                </assert_contents>
            </output>
            <output name="stat_specs">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_text text="Number of genes per-species in orthogroup"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
        </test>
        <!-- no trees + uploaded blast-->
        <test expect_num_outputs="6">
            <conditional name="init">
                <param name="start" value="blast" />
                <param name="input_blast_out">
                    <collection type="list">
                        <element name="Blast0_0.txt" value="inputs/blastout/Blast0_0.txt"/>
                        <element name="Blast0_1.txt" value="inputs/blastout/Blast0_1.txt"/>
                        <element name="Blast0_2.txt" value="inputs/blastout/Blast0_2.txt"/>
                        <element name="Blast0_3.txt" value="inputs/blastout/Blast0_3.txt"/>
                        <element name="Blast1_0.txt" value="inputs/blastout/Blast1_0.txt"/>
                        <element name="Blast1_1.txt" value="inputs/blastout/Blast1_1.txt"/>
                        <element name="Blast1_2.txt" value="inputs/blastout/Blast1_2.txt"/>
                        <element name="Blast1_3.txt" value="inputs/blastout/Blast1_3.txt"/>
                        <element name="Blast2_0.txt" value="inputs/blastout/Blast2_0.txt"/>
                        <element name="Blast2_1.txt" value="inputs/blastout/Blast2_1.txt"/>
                        <element name="Blast2_2.txt" value="inputs/blastout/Blast2_2.txt"/>
                        <element name="Blast2_3.txt" value="inputs/blastout/Blast2_3.txt"/>
                        <element name="Blast3_0.txt" value="inputs/blastout/Blast3_0.txt"/>
                        <element name="Blast3_1.txt" value="inputs/blastout/Blast3_1.txt"/>
                        <element name="Blast3_2.txt" value="inputs/blastout/Blast3_2.txt"/>
                        <element name="Blast3_3.txt" value="inputs/blastout/Blast3_3.txt"/>
                    </collection>
                </param>
                <param name="input_blast_fa">
                    <collection type="list">
                        <element name="Species0.fa" value="inputs/blastfa/Species0.fa"/>
                        <element name="Species1.fa" value="inputs/blastfa/Species1.fa"/>
                        <element name="Species2.fa" value="inputs/blastfa/Species2.fa"/>
                        <element name="Species3.fa" value="inputs/blastfa/Species3.fa"/>
                    </collection>
                </param>
                <param name="specIDs" ftype="txt" value="inputs/blastids/SpeciesIDs.txt"/>
                <param name="seqIDs" ftype="txt" value="inputs/blastids/SequenceIDs.txt"/>
            </conditional>
            <conditional name="trees">
                <param name="run_mode" value="-og" />
            </conditional>
            <param name="inflation" value="1.5" />
            <output name="orthogroups1" value="results_fromblast/Orthogroups.txt"/>
            <output name="orthogroups2" value="results_fromblast/Orthogroups.tsv"/>
            <output name="specs_overlap" value="results_fromblast/Orthogroups_SpeciesOverlaps.tsv"/>
            <output name="unassigned_genes" value="results_fromblast/Orthogroups_UnassignedGenes.tsv"/>
            <output name="stat_overall" value="results_fromblast/Statistics_Overall.tsv" lines_diff="2"/>
            <output name="stat_specs" value="results_fromblast/Statistics_PerSpecies.tsv"/>
        </test>
        <!-- full mode + diamond + input files have extension fasta/faa/fa -->
        <test expect_num_outputs="10">
            <conditional name="init">
                <param name="start" value="fasta" />
                <param name="input_fasta" ftype="fasta" value="inputs/proteomes/Mycoplasma_agalactiae.faa,inputs/proteomes/Mycoplasma_gallisepticum.faa,inputs/proteomes/Mycoplasma_genitalium.faa,inputs/proteomes/Mycoplasma_hyopneumoniae.faa" />
                <conditional name="search">
                    <param name="search_program" value="diamond"/>
                </conditional>
            </conditional>
            <conditional name="trees">
                <param name="run_mode" value="full" />
            </conditional>
            <param name="inflation" value="1.5" />
            <output name="specs_overlap">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="unassigned_genes">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="stat_overall">
                <assert_contents>
                    <has_text text="Number of genes in orthogroups"/>
                    <has_text text="Number of unassigned genes"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_text text="G50 (assigned genes)"/>
                    <has_text text="G50 (all genes)"/>
                    <has_text text="O50 (assigned genes)"/>
                    <has_text text="O50 (all genes)"/>
                </assert_contents>
            </output>
            <output name="stat_specs">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_text text="Number of genes per-species in orthogroup"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="species_tree" value="results/SpeciesTree_rooted.txt" compare="sim_size"/>
            <output_collection name="genetrees" type="list" count="0"/>
            <output name="hogs">
                <assert_contents>
                    <has_text text="HOG" />
                    <has_text text="OG" />
                    <has_text text="Gene Tree Parent Clade" />
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_n_columns n="7"/>
                </assert_contents>
            </output>
        </test>
        <!-- full mode + diamond + input files have extension fasta/faa/fa + duplications -->
        <test expect_num_outputs="15">
            <conditional name="init">
                <param name="start" value="fasta" />
                <param name="input_fasta" ftype="fasta" value="inputs/proteomes/Mycoplasma_agalactiae.faa,inputs/proteomes/Mycoplasma_gallisepticum.faa,inputs/proteomes/Mycoplasma_genitalium.faa,inputs/proteomes/Mycoplasma_hyopneumoniae.faa" />
                <conditional name="search">
                    <param name="search_program" value="diamond"/>
                </conditional>
            </conditional>
            <conditional name="trees">
                <param name="run_mode" value="full" />
            </conditional>
            <param name="inflation" value="1.5" />
            <param name="output_duplications" value="true" />
            <output name="specs_overlap">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="unassigned_genes">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="stat_overall">
                <assert_contents>
                    <has_text text="Number of genes in orthogroups"/>
                    <has_text text="Number of unassigned genes"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_text text="G50 (assigned genes)"/>
                    <has_text text="G50 (all genes)"/>
                    <has_text text="O50 (assigned genes)"/>
                    <has_text text="O50 (all genes)"/>
                </assert_contents>
            </output>
            <output name="stat_specs">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_text text="Number of genes per-species in orthogroup"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="species_tree" value="results/SpeciesTree_rooted.txt" compare="sim_size"/>
            <output name="species_tree_duplications" value="results/SpeciesTree_Gene_Duplications_0.5_Support.txt" compare="sim_size"/>
            <output name="duplications" value="results/Duplications.tsv" compare="sim_size"/>
            <output name="duplications_per_orthogroup" value="results/Duplications_per_Orthogroup.tsv" compare="sim_size"/>
            <output name="duplications_per_species_tree_node" value="results/Duplications_per_Species_Tree_Node.tsv" compare="sim_size"/>
            <output_collection name="genetrees" type="list" count="325"/>
            <output_collection name="resolved_trees" type="list" count="325"/>
            <output name="hogs">
                <assert_contents>
                    <has_text text="HOG" />
                    <has_text text="OG" />
                    <has_text text="Gene Tree Parent Clade" />
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_n_columns n="7"/>
                </assert_contents>
            </output>
        </test>
        <!-- trees + diamond + input files have no extension fasta/faa/fa + msa -->
        <test expect_num_outputs="6">
            <conditional name="init">
                <param name="start" value="fasta" />
                <param name="input_fasta" ftype="fasta" value="inputs/no_fa_ext/AcAcaud_trinity,inputs/no_fa_ext/AmAmphi_trinity,inputs/no_fa_ext/ApApomp_trinity,inputs/no_fa_ext/AsAsp1_trinity" />
                <conditional name="search">
                    <param name="search_program" value="diamond"/>
                </conditional>
            </conditional>
            <conditional name="trees">
                <param name="run_mode" value="-og" />
                <conditional name="tree_method">
                    <param name="method" value="msa" />
                    <param name="msa_program" value="muscle" />
                    <param name="msa_tree_program" value="raxml" />
                </conditional>
            </conditional>
            <param name="inflation" value="1.5" />
            <output name="specs_overlap">
                <assert_contents>
                    <has_text text="AcAcaud_trinity"/>
                    <has_text text="AmAmphi_trinity"/>
                    <has_text text="ApApomp_trinity"/>
                    <has_text text="AsAsp1_trinity"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="unassigned_genes">
                <assert_contents>
                    <has_text text="AcAcaud_trinity"/>
                    <has_text text="AmAmphi_trinity"/>
                    <has_text text="ApApomp_trinity"/>
                    <has_text text="AsAsp1_trinity"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="stat_overall">
                <assert_contents>
                    <has_text text="Number of genes in orthogroups"/>
                    <has_text text="Number of unassigned genes"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_text text="G50 (assigned genes)"/>
                    <has_text text="G50 (all genes)"/>
                    <has_text text="O50 (assigned genes)"/>
                    <has_text text="O50 (all genes)"/>
                </assert_contents>
            </output>
            <output name="stat_specs">
                <assert_contents>
                    <has_text text="AcAcaud_trinity"/>
                    <has_text text="AmAmphi_trinity"/>
                    <has_text text="ApApomp_trinity"/>
                    <has_text text="AsAsp1_trinity"/>
                    <has_text text="Number of genes per-species in orthogroup"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
        </test>
        <!-- trees + diamond + input files have no extension fasta/faa/fa + msa -->
        <test expect_num_outputs="10">
            <conditional name="init">
                <param name="start" value="fasta" />
                <param name="input_fasta" ftype="fasta" value="inputs/proteomes/Mycoplasma_agalactiae.faa,inputs/proteomes/Mycoplasma_gallisepticum.faa,inputs/proteomes/Mycoplasma_genitalium.faa,inputs/proteomes/Mycoplasma_hyopneumoniae.faa" />
                <conditional name="search">
                    <param name="search_program" value="diamond"/>
                </conditional>
            </conditional>
            <conditional name="trees">
                <param name="run_mode" value="full" />
                <conditional name="tree_method">
                    <param name="method" value="msa" />
                    <param name="msa_program" value="muscle" />
                    <param name="msa_tree_program" value="raxml" />
                </conditional>
            </conditional>
            <param name="inflation" value="1.5" />
            <output name="specs_overlap">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="unassigned_genes">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="stat_overall">
                <assert_contents>
                    <has_text text="Number of genes in orthogroups"/>
                    <has_text text="Number of unassigned genes"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_text text="G50 (assigned genes)"/>
                    <has_text text="G50 (all genes)"/>
                    <has_text text="O50 (assigned genes)"/>
                    <has_text text="O50 (all genes)"/>
                </assert_contents>
            </output>
            <output name="stat_specs">
                <assert_contents>
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_text text="Number of genes per-species in orthogroup"/>
                    <has_text text="Percentage of orthogroups"/>
                    <has_text text="Number of orthogroups"/>
                    <has_text text="Number of genes"/>
                    <has_n_columns n="5"/>
                </assert_contents>
            </output>
            <output name="hogs">
                <assert_contents>
                    <has_text text="HOG" />
                    <has_text text="OG" />
                    <has_text text="Gene Tree Parent Clade" />
                    <has_text text="Mycoplasma_agalactiae"/>
                    <has_text text="Mycoplasma_gallisepticum"/>
                    <has_text text="Mycoplasma_genitalium"/>
                    <has_text text="Mycoplasma_hyopneumoniae"/>
                    <has_n_columns n="7"/>
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="-M 'msa'"/>
                <has_text text="-A 'muscle'"/>
                <has_text text="-T 'raxml'"/>
            </assert_command>
        </test>
    </tests>
    <help>
======================
OrthoFinder OnlyGroups
======================

Full readme at https://github.com/davidemms/OrthoFinder/blob/master/README.md
Summary sketch at https://github.com/davidemms/OrthoFinder/blob/master/OrthoFinder-manual.pdf

OrthoFinder is a fast, accurate and comprehensive analysis tool for comparative genomics. It finds orthologues and orthogroups infers gene trees for all orthogroups and infers a rooted species tree for the species being analysed. OrthoFinder also provides comprehensive statistics for comparative genomic analyses. OrthoFinder is simple to use and all you need to run it is a set of protein sequence files (one per species) in FASTA format (Emms, D.M. and Kelly, S., 2015).

.. class:: infomark

This galaxy tool implements the first part of the Orthofinder program, e.g. the clustering of orthogroups of genes.

If you have already ran OrthoFinder, the tool allows to re-run the analysis from the pre-computed blast-results.

-----------
Input files
-----------
    - When using "from fasta" option (e.g Orthofinder from scratch) : the input files are a set of proteomes in fasta format (on file per species). Choose this option if you have no OrthoFinder results yet.
    - When using "from blast results" option : the input files are all the following files from of a previous OrthoFinder run (these files appear only if you have chosen to keep them while launching a previous run):
        - A dataset collection / multiple datasets for the blast outputs
        - A dataset collection / multiple datasets for .fa files
        - The SpeciesIDs.txt file
        - The SequencesIDs.txt file

----------
Parameters
----------
    - Sequence search program : You can choose either blast, blast_gz, diamond, or diamond ultra-sensitive (diamond is faster)
    - Get the blast results : Check "Yes" if, while using blast as the sequence search program, you want to retrieve the blast output files
    - Inflation : the inflation parameter; modify this parameter is not recommended.

    </help>
    <expand macro="citations"/>
</tool>