view variant.xml @ 11:2bf63b38ee9b draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/medaka commit 69eb0054f76057436094943f262a74c982d8de42"
author iuc
date Sun, 12 Sep 2021 20:35:26 +0000
parents 9fb055604648
children 0f5f4a208660
line wrap: on
line source

<?xml version="1.0"?>
<tool id="medaka_variant" name="medaka variant tool" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
    <description>Probability decoding</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>

    <expand macro="version_command"/>

    <configfiles>
        <configfile name="convert_fasta">
import sys
infile = open(sys.argv[1], 'r')
outfile = open(sys.argv[2], 'w')
for line in infile:
    if line[0] == '>':
        outfile.write(line)
    else:
        outfile.write(line.upper())
infile.close()
outfile.close()
        </configfile>
    </configfiles>

    <command detect_errors="exit_code"><![CDATA[
## initialize
@REF_FASTA@


#if $pool.pool_mode == "Yes":
    ## run
    medaka variant
    ## optional
    --debug
    #if $regions
        --regions '${regions}'
    #end if
    $verbose
    ${ambig_ref}
    ${gvcf}
    ## required
    reference.fa
    #for $current in $pool.inputs
        '$current'
    #end for
    '$out_result' ## output
    2>&1 | tee '$out_log'
#elif $pool.pool_mode == "No":
    ## run
    medaka variant
    ## optional
    --debug
    #if $regions
        --regions '${regions}'
    #end if
    $verbose
    ${ambig_ref}
    ${gvcf}
    ## required
    reference.fa
    '$pool.input'
    '$out_result' ##output
    2>&1 | tee '$out_log'
#end if
#if $out_annotated:
    ## medaka annotate errors out if the reference is lower case at a position it's annotating because it checks vs the ref base in the vcf
    && python '$convert_fasta' reference.fa  upper_reference.fa
    && ln -s '$output_annotated.in_bam' in.bam
    && ln -s '$output_annotated.in_bam.metadata.bam_index' in.bai
    && medaka tools annotate --dpsp --pad $output_annotated.pad '$out_result' upper_reference.fa in.bam tmp.vcf
    && python '$__tool_directory__/convert_VCF_info_fields.py' tmp.vcf '$out_annotated'
#end if
    ]]></command>
    <inputs>
        <conditional name="pool">
            <param name="pool_mode"  type="select" label="Are you pooling HDF5 datasets?">
                <option value="No" selected="true">No</option>
                <option value="Yes">Yes</option>
            </param>
            <when value="Yes">
                <param name="inputs" type="data" format="h5" multiple="true" label="Select consensus file(s)"/>
            </when>
            <when value="No">
                <param name="input" type="data" format="h5" label="Select consensus file(s)"/>
            </when>
        </conditional>
        <expand macro="reference"/>
        <param argument="--regions" type="text" value="" optional="true" label="Set reference names to limit variant calling" help="Separated by ','.">
            <sanitizer invalid_char="">
                <valid initial="string.ascii_letters,string.digits">
                    <add value="_"/>
                    <add value=","/>
                    <add value="."/>
                </valid>
            </sanitizer>
        </param>
        <param argument="--verbose" type="boolean" truevalue="--verbose" falsevalue="" label="Populate VCF info fields?"/>
        <param argument="--ambig_ref" type="boolean" truevalue="--ambig_ref" falsevalue="" label="Decode variants at ambiguous reference positions?" checked="false"/>
        <param argument="--gvcf" type="boolean" truevalue="--gvcf" falsevalue="" label="Output VCF records for reference loci predicted to be non-variant?" checked="false"/>
        <conditional name="output_annotated">
            <param name="output_annotated_select" type="select" label="Output annotated VCF?" help="Annotate allele frequency, depth of coverage, etc for each variant (requires BAM file)">
                <option value="true" selected="true">Output annotated VCF</option>
                <option value="false">Don't output annotated VCF</option>
            </param>
            <when value="true">
                <param name="in_bam" type="data" format="bam" optional="false" label="BAM to annotate the VCF"/>
                <param name="pad" type="integer" min="1" value="25" label="Padding width on either side of variant for realignment in medaka tools anntotate, used to calculate DPSP, DPSPS, AF, FAF, RAF, SB, DP4, and AS in the output annotated VCF"/>
            </when>
            <when value="false"/>
        </conditional>
        <param name="output_log_bool" type="boolean" label="Output log file?" checked="true"/>
    </inputs>
    <outputs>
        <!-- standard -->
        <data name="out_result" format="vcf" label="${tool.name} on ${on_string}: Result"/>
        <!-- optional -->
        <data name="out_annotated" format="vcf" label="${tool.name} on ${on_string}: Annotated">
            <filter>output_annotated['output_annotated_select']!='false'</filter>
        </data>
        <data name="out_log" format="tabular" label="${tool.name} on ${on_string}: Log">
            <filter>output_log_bool</filter>
        </data>
    </outputs>
    <tests>
        <!-- #1 default -->
        <test expect_num_outputs="3">
            <conditional name="pool">
                <param name="pool_mode" value="Yes"/>
                <param name="inputs" value="medaka_test.hdf,medaka_test.hdf"/>
            </conditional>
            <conditional name="reference_source">
                <param name="reference_source_selector" value="history"/>
                <param name="ref_file" value="ref.fasta.gz"/>
            </conditional>
            <param name="ambig_ref" value="true"/>
            <conditional name="output_annotated">
                <param name="output_annotated_select" value="true"/>
                <param name="in_bam" value="medaka_test.bam"/>
            </conditional>
            <param name="output_log_bool" value="true"/>
            
            <output name="out_result">
                <assert_contents>
                    <has_n_lines n="9"/>
                    <has_line line="##fileformat=VCFv4.1" />
                    <has_line_matching expression="##medaka_version=[0-9]+\.[0-9]+\.[0-9]+" />
                </assert_contents>
            </output>
            <output name="out_annotated">
                <assert_contents>
                    <has_n_lines n="23"/>
                    <has_line line="##fileformat=VCFv4.1" />
                    <has_line_matching expression="##medaka_version=[0-9]+\.[0-9]+\.[0-9]+" />
                    <has_line_matching expression="##convert_VCF_info_fields=[0-9]+\.[0-9]+" />
                </assert_contents>
            </output>
            <output name="out_log">
                <assert_contents>
                    <has_n_lines n="10" />
                </assert_contents>
            </output>
        </test>
        <!--No pooling-->
        <test expect_num_outputs="3">
            <conditional name="pool">
                <param name="pool_mode" value="No"/>
                <param name="input" value="medaka_test.hdf"/>
            </conditional>
            <conditional name="reference_source">
                <param name="reference_source_selector" value="history"/>
                <param name="ref_file" value="ref.fasta.gz" ftype="fasta.gz"/>
            </conditional>
            <param name="ambig_ref" value="true"/>
            <conditional name="output_annotated">
                <param name="output_annotated_select" value="true"/>
                <param name="in_bam" value="medaka_test.bam"/>
            </conditional>
            <param name="output_log_bool" value="true"/>
            
            <output name="out_result">
                <assert_contents>
                    <has_n_lines n="9"/>
                    <has_line line="##fileformat=VCFv4.1" />
                    <has_line_matching expression="##medaka_version=[0-9]+\.[0-9]+\.[0-9]+" />
                </assert_contents>
            </output>
            <output name="out_annotated">
                <assert_contents>
                    <has_n_lines n="23"/>
                    <has_line line="##fileformat=VCFv4.1" />
                    <has_line_matching expression="##medaka_version=[0-9]+\.[0-9]+\.[0-9]+" />
                    <has_line_matching expression="##convert_VCF_info_fields=[0-9]+\.[0-9]+" />
                </assert_contents>
            </output>
            <output name="out_log">
                <assert_contents>
                    <has_n_lines n="8" />
                </assert_contents>
            </output>
        </test>
        <!--No annotation or log-->
        <test expect_num_outputs="1">
            <conditional name="pool">
                <param name="pool_mode" value="No"/>
                <param name="input" value="medaka_test.hdf"/>
            </conditional>
            <conditional name="reference_source">
                <param name="reference_source_selector" value="history"/>
                <param name="ref_file" value="ref.fasta"/>
            </conditional>
            <conditional name="output_annotated">
                <param name="output_annotated_select" value="false"/>
            </conditional>
            <param name="output_log_bool" value="false"/>
            
            <output name="out_result">
                <assert_contents>
                    <has_n_lines n="9"/>
                    <has_line line="##fileformat=VCFv4.1" />
                    <has_line_matching expression="##medaka_version=[0-9]+\.[0-9]+\.[0-9]+" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

@WID@

The module *variant* decodes probabilities.

**Input**

- reference sequence (FASTA)
- (several) consensus files (H5/HDF)

**Output**

- decoded probabilities (VCF)

**References**

@REFERENCES@
    ]]></help>
    <expand macro="citations"/>
</tool>