view make_EAR.xml @ 3:3dd6be0cd8dd draft default tip

planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
author bgruening
date Tue, 15 Oct 2024 12:52:59 +0000
parents a34826ae0a73
children
line wrap: on
line source

<tool id="make_ear" name="ERGA Assembly Reporting Tool (EAR)" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
    <description>A tool to compile assembly reports and statistics from assembly pipeline</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package" version="4.1.0">reportlab</requirement>
        <requirement type="package" version="24.4.0">pyaml</requirement>
        <requirement type="package" version="2024.1">pytz</requirement>
        <requirement type="package" version="2.32.3">requests</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        python '$__tool_directory__/make_EAR.py' '$param_file' > '$log' 
    ]]></command>
    <configfiles>
        <configfile name="param_file"><![CDATA[
# SAMPLE INFORMATION
ToLID: '${sample_information.tolid}'
Species: '${sample_information.species_name}'
Sex: '${sample_information.species_sex}'
Submitter: '${sample_information.submitter}'
Affiliation: '${sample_information.affiliation}'
Tags: '${sample_information.tags}'

# SEQUENCING DATA
DATA: 
    #for $repeat in $sequencing_data.seq_data_info:
    - ${repeat.seq_data}
    #end for

# GENOME PROFILING DATA
PROFILING:
    GenomeScope:
        genomescope_summary_txt: '${genome_profiling_data.genome_scope_summary}'
    Smudgeplot:
        smudgeplot_verbose_summary_txt: '${genome_profiling_data.smudge_plot_summary}'

#if $pre_curation_assembly_data.hap2_precuration_data.hap2_exists_precuration == "yes"
# ASSEMBLY DATA
ASSEMBLIES:
    Pre-curation:
        '${pre_curation_assembly_data.haplotype_selection}': 
            gfastats--nstar-report_txt: '${pre_curation_assembly_data.gfstats_nstar_report_precuration}'
            busco_short_summary_txt: '${pre_curation_assembly_data.busco_short_summary_precuration}'
            merqury_qv: '${pre_curation_assembly_data.mercury_qv_precuration}'
            merqury_completeness_stats: '${pre_curation_assembly_data.merqury_completeness_stats_precuration}'
        hap2: 
            gfastats--nstar-report_txt: '${pre_curation_assembly_data.hap2_precuration_data.gfstats_nstar_report_hap2_precuration}'
            busco_short_summary_txt: '${pre_curation_assembly_data.hap2_precuration_data.busco_short_summary_hap2_precuration}'
            merqury_qv: '${pre_curation_assembly_data.hap2_precuration_data.mercury_qv_hap2_precuration}'
            merqury_completeness_stats: '${pre_curation_assembly_data.hap2_precuration_data.merqury_completeness_stats_hap2_precuration}'

    Curated:
        '${pre_curation_assembly_data.haplotype_selection}':
            gfastats--nstar-report_txt: '${curated_assembly_data.gfstats_nstar_report_curated}'
            busco_short_summary_txt: '${curated_assembly_data.busco_short_summary_curated}'
            merqury_qv: '${curated_assembly_data.mercury_qv_curated}'
            merqury_completeness_stats: '${curated_assembly_data.merqury_completeness_stats_curated}'
            merqury_hap_spectra_cn_png: '${curated_assembly_data.merqury_hap_spectra_cn_curated}'
            merqury_spectra_cn_png:  '${curated_assembly_data.merqury_spectra_cn_curated}'   
            merqury_spectra_asm_png: '${curated_assembly_data.merqury_spectra_asm_curated}'
            hic_FullMap_png: '${curated_assembly_data.hic_FullMap_curated}'
            hic_FullMap_link: '${curated_assembly_data.hic_FullMap_link_curated}'
            blobplot_cont_png: '${curated_assembly_data.blobplot_cont_curated}'
        hap2:
            gfastats--nstar-report_txt: '${curated_assembly_data.hap2_curated_data.gfstats_nstar_report_hap2_curated}'
            busco_short_summary_txt: '${curated_assembly_data.hap2_curated_data.busco_short_summary_hap2_curated}'
            merqury_qv: '${curated_assembly_data.hap2_curated_data.mercury_qv_hap2_curated}'
            merqury_completeness_stats: '${curated_assembly_data.hap2_curated_data.merqury_completeness_stats_hap2_curated}'
            merqury_hap_spectra_cn_png: '${curated_assembly_data.hap2_curated_data.merqury_hap_spectra_cn_hap2_curated}'
            merqury_spectra_cn_png: '${curated_assembly_data.hap2_curated_data.merqury_spectra_cn_hap2_curated}'
            merqury_spectra_asm_png: '${curated_assembly_data.hap2_curated_data.merqury_spectra_asm_hap2_curated}'
            hic_FullMap_png: '${curated_assembly_data.hap2_curated_data.hic_FullMap_hap2_curated}'
            hic_FullMap_link: '${curated_assembly_data.hap2_curated_data.hic_FullMap_link_hap2_curated}'
            blobplot_cont_png: '${curated_assembly_data.hap2_curated_data.blobplot_cont_hap2_curated}'

#else
# ASSEMBLY DATA
ASSEMBLIES:
    Pre-curation:
        '${pre_curation_assembly_data.haplotype_selection}': 
            gfastats--nstar-report_txt: '${pre_curation_assembly_data.gfstats_nstar_report_precuration}'
            busco_short_summary_txt: '${pre_curation_assembly_data.busco_short_summary_precuration}'
            merqury_qv: '${pre_curation_assembly_data.mercury_qv_precuration}'
            merqury_completeness_stats: '${pre_curation_assembly_data.merqury_completeness_stats_precuration}'

    Curated:
        '${pre_curation_assembly_data.haplotype_selection}':
            gfastats--nstar-report_txt: '${curated_assembly_data.gfstats_nstar_report_curated}'
            busco_short_summary_txt: '${curated_assembly_data.busco_short_summary_curated}'
            merqury_qv: '${curated_assembly_data.mercury_qv_curated}'
            merqury_completeness_stats: '${curated_assembly_data.merqury_completeness_stats_curated}'
            merqury_hap_spectra_cn_png: '${curated_assembly_data.merqury_hap_spectra_cn_curated}'
            merqury_spectra_cn_png:  '${curated_assembly_data.merqury_spectra_cn_curated}'   
            merqury_spectra_asm_png: '${curated_assembly_data.merqury_spectra_asm_curated}'
            hic_FullMap_png: '${curated_assembly_data.hic_FullMap_curated}'
            hic_FullMap_link: '${curated_assembly_data.hic_FullMap_link_curated}'
            blobplot_cont_png: '${curated_assembly_data.blobplot_cont_curated}'
#end if

# METHODS DATA
PIPELINES:
  Assembly:
    #for $repeat in $method_data.assembly_method_info:
    ${repeat.assembly_tools_info}
    #end for

  Curation:
    #for $repeat in $method_data.curation_method_info:
    ${repeat.curation_tools_info}
    #end for

# CURATION NOTES
NOTES:
    Obs_Haploid_num: '${curation_notes.obs_haploid_num}'
    Obs_Sex: '${curation_notes.obs_sex}'
    Interventions_per_Gb: '${curation_notes.interventions_per_gb}'
    Contamination_notes: '${curation_notes.contam_notes}'
    Other_notes: '${curation_notes.other_notes}'

        ]]></configfile>
    </configfiles>

    <inputs>
        <!-- Input parameters for Sample Information -->
        <section name="sample_information" title="Sample Information"  expanded="true">
            <param name="tolid" type="text" optional="False" value="" label="Input a ToLID"/>
            <param name="species_name" type="text" optional="False" value="" label="Enter the Species name"/>
            <param name="species_sex" type="select" label="Enter the Species sex">
                <option value="XX">XX</option>
                <option value="XY">XY</option>
                <option value="X0">X0</option>
                <option value="ZZ">ZZ</option>
                <option value="ZW">ZW</option>
                <option value="NA">NA</option>
            </param>
            <param name="submitter" type="text" value="" optional="False" label="Enter the Submitter name"/>
            <param name="affiliation" type="text" value="" optional="False" label="Enter the Affiliation"/>
            <param name="tags" type="select" label="Select a valid tag"> 
                <option value="ERGA-BGE" selected="true">ERGA-BGE</option>
                <option value="ERGA-Pilot">ERGA-Pilot</option>
                <option value="ERGA-Community">ERGA-Community</option>
            </param>
        </section>

        <!-- Input parameters for Sequence Data Information -->
        <section name="sequencing_data" title="Sequencing Data Information">
            <repeat name='seq_data_info' title="Add Sequence Data information" min="1" default="2">
                <param name="seq_data" type="text" label="Enter Sequencing data information" optional="False" help="Input Sequencing data information along with coverage scores if available. (Example Input: HiFi: 40X)"/>
            </repeat>
        </section>

        <!-- Input parameters for Genome Profiling Data -->
        <section name="genome_profiling_data" title="Genome Profiling Information">
            <param name="genome_scope_summary" type="data" format="txt" label="Summary file from GenomeScope tool"/>
            <param name="smudge_plot_summary" type="data" format="txt" optional="true" label="Summary file from Smudge Plot tool"/>
        </section>

        <!-- Input parameters for Assembly data precurated -->
        <section name="pre_curation_assembly_data" title="Pre-Curation Assembly Information">
            <param name="haplotype_selection" type="select"  label="Select a valid haplotype"> 
                <option value="hap1">hap1</option>
                <option value="pri">pri</option>
                <option value="collapsed">collapsed</option>
            </param>
            <param name="gfstats_nstar_report_precuration" type="data" format="txt" label="Select gfastats--nstar report file from gfstats tool"/>
            <param name="busco_short_summary_precuration" type="data" format="txt" label="Select short summary report file from busco tool"/>
            <param name="mercury_qv_precuration" type="data" format="txt" label="Select Merqury results .qv file"/>
            <param name="merqury_completeness_stats_precuration" type="data" format="txt" label="Select Merqury results completeness.stats file"/>
            <conditional name="hap2_precuration_data">
                <param name="hap2_exists_precuration" type="select" label="Do you have data for Hap2?">
                    <option value="no" selected="true">No</option>
                    <option value="yes">Yes</option>
                </param>
                <when value="no"/>
                <when value="yes">
                    <param name="gfstats_nstar_report_hap2_precuration" type="data" format="data" label="Select gfastats--nstar report tool from gfstats tool for Hap2"/>
                    <param name="busco_short_summary_hap2_precuration" type="data" format="txt" label="Select short summary from busco tool for Hap2"/>
                    <param name="mercury_qv_hap2_precuration" type="data" format="txt" label="Select Merqury results .qv file for Hap2"/>
                    <param name="merqury_completeness_stats_hap2_precuration" type="data" format="txt" label="Select Merqury results completeness.stats file for Hap2"/>
                </when>
            </conditional>
        </section>
        
        <!-- Input parameters for Assembly data Curated -->    
        <section name="curated_assembly_data" title="Curated Assembly Data Information">    
            <param name="gfstats_nstar_report_curated" type="data" format="txt" label="Select curated gfastats--nstar report file"/>
            <param name="busco_short_summary_curated" type="data" format="txt" label="Select curated busco_short_summary.txt file"/>
            <param name="mercury_qv_curated" type="data" format="txt" label="Select curated Merqury results .qv file"/>
            <param name="merqury_completeness_stats_curated" type="data" format="txt" label="Select curated Merqury completeness.stats file"/>
            <param name="merqury_hap_spectra_cn_curated" type="data" format="png" label="Select Merqury HAP1 spectra-cn.ln.png file"/>
            <param name="merqury_spectra_cn_curated" type="data" format="png" label="Select Merqury results spectra-cn.ln.png file"/>
            <param name="merqury_spectra_asm_curated" type="data" format="png" label="Select Merqury results spectra-asm.ln.png file"/>
            <param name="hic_FullMap_curated" type="data" format="png" label="Select pretext FullMap plot"/>
            <param name="hic_FullMap_link_curated" type="text" value="" label="Insert .pretext file web link" help="This can be copied from the dataset-download-button"/>
            <param name="blobplot_cont_curated" type="data" format="png" label="Select blobplot contamination plot file"/>

            <conditional name="hap2_curated_data">
                <param name="hap2_exists_curated" type="select" label="Do you have data for Hap2?">
                    <option value="no" selected="true">No</option>
                    <option value="yes">Yes</option>
                </param>
                <when value="no"/>
                <when value="yes">
                    <param name="gfstats_nstar_report_hap2_curated" type="data" format="txt" label="Select curated gfastats--nstar report file for hap2"/>
                    <param name="busco_short_summary_hap2_curated" type="data" format="txt" label="Select curated busco_short_summary.txt file for hap2"/>
                    <param name="mercury_qv_hap2_curated" type="data" format="txt" label="Select curated Merqury results .qv file for hap2"/>
                    <param name="merqury_completeness_stats_hap2_curated" type="data" format="txt" label="Select curated Merqury results completeness.stats file for hap2"/>
                    <param name="merqury_hap_spectra_cn_hap2_curated" type="data" format="png" label="Select Merqury results HAP1 spectra-cn.ln.png file for hap2"/>
                    <param name="merqury_spectra_cn_hap2_curated" type="data" format="png" label="Select Merqury HAP2 spectra-cn.ln.png file"/>
                    <param name="merqury_spectra_asm_hap2_curated" type="data" format="png" label="Select curated Merqury results spectra-asm.ln.png file for hap2"/>
                    <param name="hic_FullMap_hap2_curated" type="data" format="png" label="Select pretext FullMap plot for hap2"/>
                    <param name="hic_FullMap_link_hap2_curated" type="text" value="" label="Insert .pretext file web link for hap2" help="This can be copied from the dataset-download-button"/>
                    <param name="blobplot_cont_hap2_curated" type="data" format="png" label="Select blobplot contamination plot for hap2"/>
                </when>
            </conditional>
        </section>
        
        <!-- Input parameters for Methods data -->
        <section name="method_data" title="Method Information">
            <repeat name='assembly_method_info' title="Specify the method used for Assembly" min="1" default="2">
                <param name="assembly_tools_info" type="text" label="Input tool names along with version and paramters used for assembly" optional="False" help="Specify ToolName:Tool_Version/Tool_parameters"/>
            </repeat>
            <repeat name='curation_method_info' title="Specify the method used for Curation" min="1" default="2">
                <param name="curation_tools_info" type="text" label="Input tool names along with version and paramters used for curation" optional="False" help="Specify ToolName:Tool_Version/Tool_parameters"/>
            </repeat>
        </section>

        <!-- Input parameters for Curation notes --> 
        <section name="curation_notes" title="Curation Notes">
            <param name="obs_haploid_num" type="text" optional="False" label="Insert observed haploid number"/>
            <param name="obs_sex" type="select" label="Select observed sex" help="Example: XX, XY, X0, ZZ, ZW, NA">
                <option value="XX">XX</option>
                <option value="XY">XY</option>
                <option value="X0">X0</option>
                <option value="ZZ">ZZ</option>
                <option value="ZW">ZW</option>
                <option value="NA">NA</option>
            </param>
            <param name="interventions_per_gb" type="text" label="Insert manual intervention during curation (GB)"/>
            <param name="contam_notes" type="text" label="Insert contamination notes"/>
            <param name="other_notes" type="text" label="Insert Other notes"/>
        </section>
    </inputs>

    <outputs>
        <data name="EAR_pdf" format="pdf" from_work_dir="EAR.pdf" label="${tool.name} on ${on_string}: Output PDF" ></data> 
        <data name="log" format="txt" from_work_dir="EAR.log" label="${tool.name} on ${on_string}: Log file"/>
    </outputs>
    <tests>
        <!--- Test with 1 haplotypes information-->
        <test expect_num_outputs="2">
            <section name="sample_information">
                <param name="tolid" value="xgPhyFlav1"/>
                <param name="species_name" value="Phyllidia flava"/>
                <param name="species_sex" value="XX"/>
                <param name="submitter" value="John Doe"/>
                <param name="affiliation" value="Galaxy EU"/>
                <param name="tags" value="ERGA-BGE"/>
            </section>
            <section name="sequencing_data">
                <repeat name="seq_data_info">
                    <param name="seq_data" value="HiFi: 40x"/>
                </repeat>
                <repeat name="seq_data_info">
                    <param name="seq_data" value="Bionano: 10x"/>
                </repeat>
                <repeat name="seq_data_info">
                    <param name="seq_data" value="OmniC: 90x"/>
                </repeat>
            </section>
            <section name="genome_profiling_data">
                <param name="genome_scope_summary" value="genomescope_results_summary.txt"/>
                <param name="smudge_plot_summary" value="smudgeplot_verbose_summary.txt"/>
            </section>
            <section name="pre_curation_assembly_data">
                <param name="haplotype_selection" value="hap1"/>
                <param name="gfstats_nstar_report_precuration" value="ele_pre.asm1_gfastats.txt"/>
                <param name="busco_short_summary_precuration" value="short_summary.specific.mammalia_odb10.pre.asm1.txt"/>
                <param name="mercury_qv_precuration" value="ele_pre_merqOutput.qv"/>
                <param name="merqury_completeness_stats_precuration" value="ele_pre_merqOutput.completeness.stats"/>
                <conditional name="hap2_precuration_data">
                    <param name="hap2_exists_precuration" value="no"/>
                </conditional>
            </section>
            <section name="curated_assembly_data">
                <param name="gfstats_nstar_report_curated" value="ele.asm1_post_gfastats.txt"/>
                <param name="busco_short_summary_curated" value="short_summary.specific.mammalia_odb10.post.asm1.txt"/>
                <param name="mercury_qv_curated" value="ele_post_merqOutput.qv"/>
                <param name="merqury_completeness_stats_curated" value="ele_post_merqOutput.completeness.stats"/>
                <param name="merqury_hap_spectra_cn_curated" value="ele_post_merqOutput.ele_post.asm1.spectra-cn.ln.png"/>
                <param name="merqury_spectra_cn_curated" value="ele_post_merqOutput.spectra-cn.ln.png"/>
                <param name="merqury_spectra_asm_curated" value="ele_post_merqOutput.spectra-asm.ln.png"/>
                <param name="hic_FullMap_curated" value="pretext_snake1.png"/>
                <param name="hic_FullMap_link_curated" value="https://box.fu-berlin.de/apps/files/something"/>
                <param name="blobplot_cont_curated" value="blob1.png"/>
                <conditional name="hap2_curated_data">
                    <param name="hap2_exists_curated" value="no"/>
                </conditional>
            </section>
            <expand macro="methods_tests"></expand>
            <section name="curation_notes">
                <param name="obs_haploid_num" value="28"/>
                <param name="obs_sex" value="XX"/>
                <param name="interventions_per_gb" value="2"/>
                <param name="contam_notes" value="No presence of contaminants. Mitochondrial genome was removed from the assembly"/>
                <param name="other_notes" value="Large collapsed repeat in chr5, haplotypic inversion in chr12"/>
            </section>
            <output name="EAR_pdf" file="EAR.pdf" ftype="pdf" compare="sim_size"/>
            <output name="log" file="EAR_log"/>    
        </test>
        <!--- Test with 2 haplotypes information-->
        <test expect_num_outputs="2">
            <section name="sample_information">
                <param name="tolid" value="xgPhyFlav1"/>
                <param name="species_name" value="Phyllidia flava"/>
                <param name="species_sex" value="XX"/>
                <param name="submitter" value="John Doe"/>
                <param name="affiliation" value="Galaxy EU"/>
                <param name="tags" value="ERGA-BGE"/>
            </section>
            <section name="sequencing_data">
                <repeat name="seq_data_info">
                    <param name="seq_data" value="HiFi: 40x"/>
                </repeat>
                <repeat name="seq_data_info">
                    <param name="seq_data" value="Bionano: 10x"/>
                </repeat>
                <repeat name="seq_data_info">
                    <param name="seq_data" value="OmniC: 90x"/>
                </repeat>
            </section>
            <section name="genome_profiling_data">
                <param name="genome_scope_summary" value="genomescope_results_summary.txt"/>
                <param name="smudge_plot_summary" value="smudgeplot_verbose_summary.txt"/>
            </section>
            <section name="pre_curation_assembly_data">
                <param name="haplotype_selection" value="hap1"/>
                <param name="gfstats_nstar_report_precuration" value="ele_pre.asm1_gfastats.txt"/>
                <param name="busco_short_summary_precuration" value="short_summary.specific.mammalia_odb10.pre.asm1.txt"/>
                <param name="mercury_qv_precuration" value="ele_pre_merqOutput.qv"/>
                <param name="merqury_completeness_stats_precuration" value="ele_pre_merqOutput.completeness.stats"/>
                <conditional name="hap2_precuration_data">
                    <param name="hap2_exists_precuration" value="yes"/>
                    <param name="gfstats_nstar_report_hap2_precuration" value="ele_pre.asm2_gfastats.txt"/>
                    <param name="busco_short_summary_hap2_precuration" value="short_summary.specific.mammalia_odb10.pre.asm2.txt"/>
                    <param name="mercury_qv_hap2_precuration" value="ele_pre_merqOutput.qv"/>
                    <param name="merqury_completeness_stats_hap2_precuration" value="ele_pre_merqOutput.completeness.stats"/>
                </conditional>
            </section>
            <section name="curated_assembly_data">
                <param name="gfstats_nstar_report_curated" value="ele.asm1_post_gfastats.txt"/>
                <param name="busco_short_summary_curated" value="short_summary.specific.mammalia_odb10.post.asm1.txt"/>
                <param name="mercury_qv_curated" value="ele_post_merqOutput.qv"/>
                <param name="merqury_completeness_stats_curated" value="ele_post_merqOutput.completeness.stats"/>
                <param name="merqury_hap_spectra_cn_curated" value="ele_post_merqOutput.ele_post.asm1.spectra-cn.ln.png"/>
                <param name="merqury_spectra_cn_curated" value="ele_post_merqOutput.spectra-cn.ln.png"/>
                <param name="merqury_spectra_asm_curated" value="ele_post_merqOutput.spectra-asm.ln.png"/>
                <param name="hic_FullMap_curated" value="pretext_snake1.png"/>
                <param name="hic_FullMap_link_curated" value="https://box.fu-berlin.de/apps/files/something"/>
                <param name="blobplot_cont_curated" value="blob1.png"/>
                <conditional name="hap2_curated_data">
                    <param name="hap2_exists_curated" value="yes"/>
                    <param name="gfstats_nstar_report_hap2_curated" value="ele.asm2_post_gfastats.txt"/>
                    <param name="busco_short_summary_hap2_curated" value="short_summary.specific.mammalia_odb10.post.asm2.txt"/>
                    <param name="mercury_qv_hap2_curated" value="ele_post_merqOutput.qv"/>
                    <param name="merqury_completeness_stats_hap2_curated" value="ele_post_merqOutput.completeness.stats"/>
                    <param name="merqury_hap_spectra_cn_hap2_curated" value="ele_post_merqOutput.ele_post.asm2.spectra-cn.ln.png"/>
                    <param name="merqury_spectra_cn_hap2_curated" value="ele_post_merqOutput.spectra-cn.ln.png"/>
                    <param name="merqury_spectra_asm_hap2_curated" value="ele_post_merqOutput.spectra-asm.ln.png"/>
                    <param name="hic_FullMap_hap2_curated" value="pretext_snake2.png"/>
                    <param name="hic_FullMap_link_hap2_curated" value="https://box.fu-berlin.de/apps/files/something"/>
                    <param name="blobplot_cont_hap2_curated" value="blob2.png"/>
                </conditional>
            </section>
            <expand macro="methods_tests"></expand>
            <section name="curation_notes">
                <param name="obs_haploid_num" value="28"/>
                <param name="obs_sex" value="XX"/>
                <param name="interventions_per_gb" value="2"/>
                <param name="contam_notes" value="No presence of contaminants. Mitochondrial genome was removed from the assembly"/>
                <param name="other_notes" value="Large collapsed repeat in chr5, haplotypic inversion in chr12"/>
            </section>
            <output name="EAR_pdf" file="EAR_2.pdf" ftype="pdf" compare="sim_size"/>
        </test>
    </tests>
    <help><![CDATA[

.. class:: infomark

**What it does**

**ERGA Assembly Reporting Tool (EAR)** is a tool that compiles various statistics and reports generated from Assembly pipeline tools and compiles them in to a summarized PDF document for reporting.
    ]]></help>
    <expand macro="citations"/>
    <expand macro="creator"/>
</tool>