Mercurial > repos > iuc > jasminesv

<?xml version="1.0"?>
<tool id="jasminesv" name="JasmineSV" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Merge structural variants across samples</description>
    <macros>
        <import>macros.xml</import>
    </macros>

    <expand macro="requirements"/>
    <expand macro="version_command"/>

    <command detect_errors="exit_code"><![CDATA[
    #if $dup_to_ins.dup_to_ins:
        @REF_FASTA@
    #end if

    jasmine
    ## Optional params
    'max_dist=${max_dist}'
    #if float($max_dist_linear) != 0.0:
        'max_dist_linear=${max_dist_linear}'
    #end if
    'min_dist=${min_dist}'
    'kd_tree_norm=${kd_tree_norm}'
    'min_seq_id=${min_seq_id}'
    'k_jaccard=${k_jaccard}'
    'max_dup_length=${max_dup_length}'
    'min_support=${min_support}'
    threads=\${GALAXY_SLOTS:-4}
    'spec_reads=${spec_reads}'
    'spec_len=${spec_len}'
    #if $dup_to_ins.dup_to_ins:
        'genome_file=reference'
    #end if

    ## Flags:
    '${ignore_strand}'
    '${ignore_type}'
    #if $dup_to_ins.dup_to_ins:
        '${dup_to_ins}'
    #end if
    '${mark_specific}'
    '${pre_normalize}'
    '${use_edit_dist}'
    '${preprocess_only}'
    '${postprocess_only}'
    '${keep_var_ids}'
    '${use_end}'
    '${output_genotypes}'
    '${ignore_merged_inputs}'
    '${centroid_merging}'
    '${clique_merging}'
    '${allow_intrasample}'
    '${normalize_type}'
    '${leave_breakpoints}'
    '${require_first_sample}'

    '${normalize.normalize_chrs}'
    #if $normalize.normalize_chrs and $normalize.chr_norm_file:
        'chr_norm_file=${normalize.chr_norm_file}'
    #end if

    ## Required args
    file_list='${vcffilelist}'
    out_file='${out_vcf}'
    ]]></command>
    <configfiles>
        <configfile name="vcffilelist">#
#for $vcf_file in $vcf_list:
${vcf_file}
#end for
        </configfile>
    </configfiles>
    <inputs>
        <!--TODO in future versions (?)-
        add IrisSV support for post-processing. For now just make it accessible as a separate tool and allow users to run independently
        -->
        <!--
            Input files
        -->
        <param name="vcf_list" type="data" multiple="true" format="vcf" label="VCF file(s) to merge" help=""/>
        <!--
            Params
        -->
        <param argument="max_dist" type="integer" value="1000" min="0" label="The maximum distance variants can be apart when being merged" help="Setting both max_dist_linear and max_dist sets thresholds to minimum of max_dist and max_dist_linear * sv_length"/>
        <param argument="min_dist" type="integer" value="-1" min="-1" label="The minimum distance threshold a variant can have when using max_dist_linear" />
        <param argument="max_dist_linear" type="float" value="0." min="0.0" label="Make max_dist this proportion of the length of each variant" help="Setting both max_dist_linear and max_dist sets thresholds to minimum of max_dist and max_dist_linear * sv_length"/>
        <param argument="kd_tree_norm" type="integer" value="2" min="1" label="The power to use in kd-tree distances (1 is Manhattan, 2 is Euclidean, etc.)" />
        <param argument="min_seq_id" type="float" value="0." min="0." label="The minimum sequence identity for two insertions to be merged" />
        <param argument="k_jaccard" type="integer" value="9" min="1" label="The kmer size to use when computing Jaccard similarity of insertions" />
        <param argument="max_dup_length" type="integer" value="10000" min="0" label="The maximum length of duplication that can be converted to an insertion" />
        <param argument="min_support" type="integer" value="1" min="1" label="The minimum number of callsets a variant must be in to be output" />
        <param argument="spec_reads" type="integer" value="10" min="1" label="The minimum number of reads a variant needs to be in the specific callset" />
        <param argument="spec_len" type="integer" value="30" min="1" label="The minimum length a variant needs to be in the specific callset" />
        <!--
            Flags
        -->
        <param argument="--ignore_strand" type="boolean" checked="false" truevalue="--ignore_strand" falsevalue="" label="Allow variants with different strands to be merged" />
        <param argument="--ignore_type" type="boolean" checked="false" truevalue="--ignore_type" falsevalue="" label="Allow variants with different types to be merged" />
        <conditional name="dup_to_ins">
            <param argument="--dup_to_ins" type="select" checked="false" label="Convert duplications to insertions for SV merging and then convert them back?" help="Requires reference genome" >
                <option value="--dup_to_ins">Convert duplications to insertions for SV merging and then convert them back</option>
                <option value="" selected="true">Don't convert duplications to insertions for SV merging</option>
            </param>
            <when value="--dup_to_ins">
                <expand macro="reference"/>
            </when>
            <when value=""/>
        </conditional>
        <param argument="--mark_specific" type="boolean" checked="false" truevalue="--mark_specific" falsevalue="" label="Mark calls in the original VCF files that have enough support to called specific" />
        <param argument="--pre_normalize" type="boolean" checked="false" truevalue="--pre_normalize" falsevalue="" label="Run type normalization before merging" />
        <param argument="--use_edit_dist" type="boolean" checked="false" truevalue="--use_edit_dist" falsevalue="" label="Use edit distance for comparing insertion sequences instead of Jaccard" />
        <param argument="--preprocess_only" type="boolean" checked="false" truevalue="--preprocess_only" falsevalue="" label="Only run the preprocessing and not the actual merging or post-processing" />
        <param argument="--postprocess_only" type="boolean" checked="false" truevalue="--postprocess_only" falsevalue="" label="Only run the postprocessing and not the actual merging or pre-processing" />
        <param argument="--keep_var_ids" type="boolean" checked="false" truevalue="--keep_var_ids" falsevalue="" label="Don't change variant IDs (should only be used if input IDs are unique across samples)" />
        <param argument="--use_end" type="boolean" checked="false" truevalue="--use_end" falsevalue="" label="Use the end coordinate as the second coordinate instead of the variant length" />
        <param argument="--output_genotypes" type="boolean" checked="false" truevalue="--output_genotypes" falsevalue="" label="Print the genotypes of the consensus variants in all of the samples they came from" />
        <param argument="--ignore_merged_inputs" type="boolean" checked="false" truevalue="--ignore_merged_inputs" falsevalue="" label="Ignore merging info such as support vectors which is already present in the inputs" />
        <param argument="--centroid_merging" type="boolean" checked="false" truevalue="--centroid_merging" falsevalue="" label="Require every group to have a centroid which is within the distance threshold of each variant" />
        <param argument="--clique_merging" type="boolean" checked="false" truevalue="--clique_merging" falsevalue="" label="Require every group to have each pair within in it be mergeable" />
        <param argument="--allow_intrasample" type="boolean" checked="false" truevalue="--allow_intrasample" falsevalue="" label="Allow variants in the same sample to be merged" />
        <param argument="--normalize_type" type="boolean" checked="false" truevalue="--normalize_type" falsevalue="" label="Convert all variants to INS/DEL/DUP/INV/TRA" />
        <param argument="--leave_breakpoints" type="boolean" checked="false" truevalue="--leave_breakpoints" falsevalue="" label="Leave breakpoints as they are even if they are inconsistent" />
        <param argument="--require_first_sample" type="boolean" checked="false" truevalue="--require_first_sample" falsevalue="" label="Only output merged variants which include a variant from the first sample" />
        <conditional name="normalize">
            <param argument="--normalize_chrs" type="select" checked="false" label="Whether to normalize chromosome names" help="(to NCBI standards - without 'chr' - by default)">
                <option value="--normalize_chrs">Normalize chromosome names</option>
                <option value="" selected="true">Don't normalize chromosome names</option>
            </param>
            <when value="--normalize_chrs">
                <param name="chr_norm_file" type="data" format="txt,tsv" value="" label="A file containing chromosome name mappings" optional="true"/>
            </when>
            <when value=""/>
        </conditional>
    </inputs>
    <outputs>
        <!-- standard -->
        <data name="out_vcf" format="vcf" label="${tool.name} on ${on_string}: Result"/>
    </outputs>
    <tests>
        <!-- #1 default -->
        <test expect_num_outputs="1">
            <param name="vcf_list" value="a.vcf,b.vcf" ftype="vcf"/>
            <output name="out_vcf" file="out1.vcf"/>
        </test>
        <test expect_num_outputs="1">
            <param name="vcf_list" value="c.vcf,d.vcf" ftype="vcf"/>
            <conditional name="normalize">
                <param name="normalize_chrs" value="--normalize_chrs"/>
                <param name="chr_norm_file" value="chr_norm_file.txt"/>
            </conditional>
            <output name="out_vcf" file="out1.vcf"/>
        </test>
        <test expect_num_outputs="1">
            <param name="vcf_list" value="a.vcf,b.vcf" ftype="vcf"/>
            <conditional name="dup_to_ins">
                <param name="dup_to_ins" value="--dup_to_ins"/>
                <conditional name="reference_source">
                    <param name="reference_source_selector" value="history"/>
                    <param name="ref_file" ftype="fasta.gz" value="genome.fa.gz"/>
                </conditional>
            </conditional>
            <output name="out_vcf" file="out1.vcf"/>
        </test>
        <test expect_num_outputs="1">
            <param name="vcf_list" value="a.vcf,b.vcf" ftype="vcf"/>
            <conditional name="dup_to_ins">
                <param name="dup_to_ins" value="--dup_to_ins"/>
                <conditional name="reference_source">
                    <param name="reference_source_selector" value="cached"/>
                    <param name="ref_file" value="jasmine"/>
                </conditional>
            </conditional>
            <output name="out_vcf" file="out1.vcf"/>
        </test>

    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

@WID@

**Input**

- Multiple VCF files to be merged

**Output**

- Merged Variants (VCF)

**References**

@REFERENCES@
    ]]></help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Thu, 29 Apr 2021 12:24:00 +0000
parents	630e2929a131
children