view bcftools_cnv.xml @ 18:0238f68ad641 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bcftools commit cdb4558db96dd58f9f85267488a1e27ad813fca4
author iuc
date Thu, 29 Sep 2022 09:11:06 +0000
parents 3a7084316a6e
children 143796b504eb
line wrap: on
line source

<?xml version='1.0' encoding='utf-8'?>
<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Call copy number variation from VCF B-allele frequency (BAF) and Log R Ratio intensity (LRR) values</description>
    <macros>
        <token name="@EXECUTABLE@">cnv</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools" />
    <expand macro="requirements">
        <expand macro="matplotlib_requirement" />
    </expand>
    <expand macro="version_command" />
    <command detect_errors="aggressive"><![CDATA[
@PREPARE_ENV@
@PREPARE_INPUT_FILE@
#set $section = $sec_restrict
@PREPARE_TARGETS_FILE@
@PREPARE_REGIONS_FILE@

bcftools @EXECUTABLE@

#set $output_dir = 'cnv_tmp'
--output-dir $output_dir

## General section
#set $query_sample = str($query_sample).strip()
#set $control_sample = str($control_sample).strip()
#if $control_sample:
  -c '$control_sample'
#end if
#if $query_sample:
  -s '$query_sample'
#end if
@AF_FILE@
#if str($plotting.generate_plots) != 'none':
  -p ${plotting.plot_threshold}
#end if

## HMM section
#set $section = $sec_hmm
--aberrant "${section.aberrant_query},${section.aberrant_control}"
#if str($section.aberrant_optimization.do_optimize) == 'yes':
  --optimize ${section.aberrant_optimization.lower_bound}
#end if
--BAF-weight ${section.score_usage.baf_weight}
--BAF-dev "${section.score_usage.baf_dev_query},${section.score_usage.baf_dev_control}"
--LRR-weight ${section.score_usage.lrr_weight}
#if str($section.score_usage.compute_on) == 'baf+lrr':
  --LRR-dev "${section.score_usage.lrr_dev_query},${section.score_usage.lrr_dev_control}"
  --LRR-smooth-win ${section.score_usage.lrr_smooth_win}
#end if
#if $section.same_prob:
  --same-prob ${section.same_prob}
#end if
#if $section.err_prob:
  --err-prob ${section.err_prob}
#end if
#if $section.xy_prob:
  --xy-prob ${section.xy_prob}
#end if

## Filter section
#set $section = $sec_restrict
@REGIONS@
@TARGETS@

## Primary Input/Outputs
@INPUT_FILE@

&& mv $output_dir/cn.*.tab '$output_cn'
#if $control_sample:
  && mv ${output_dir}/summary.tab '$output_summary'
#else:
  && mv ${output_dir}/summary.*.tab '$output_summary'
#end if

#if str($plotting.generate_plots) != 'none':
  ## collect all generated plots and embed them in html as base-64 encoded data
  && (echo '<html><body><head><title>Copy-number variation plots (bcftools cnv)</title><style type="text/css">
  @media print {
	img {
		max-width:100% !important;
		page-break-inside: avoid;
	}
  </style>' > $output_plots;
  for plot in $output_dir/*.png; do
    [ -f "\$plot" ] || break;
    echo '<div><img src="data:image/png;base64,' >> $output_plots;
    python -m base64 \$plot >> $output_plots;
    echo '" /></div><hr>' >> $output_plots;
  done;
  echo '</body></html>' >> $output_plots;)
#end if

]]>
    </command>
    <inputs>
        <expand macro="macro_input" />
        <param argument="-s" name="query_sample" type="text" label="Query Sample" optional="True" help="The name (as used in the input) of the query sample in the input. Can be ommitted if, and only if, there is only one sample in the input." />
        <param argument="-c" name="control_sample" type="text" label="Control Sample" optional="True" help="The name (as used in the input) of an optional control sample to compare against. Note: The pairwise calling mode represents the real strength of the tool as it helps to reduce the number of false calls and also allows one to distinguish between normal and novel copy number variation." />
        <expand macro="macro_AF_file" />
        <conditional name="plotting">
            <param name="generate_plots" type="select" label="Plot results?">
                <option value="all">Yes, plot results for all chromosomes</option>
                <option value="some">Yes, but plot only chromosomes with above-threshold quality of CNV status estimate</option>
                <option value="none">No, do not generate plots</option>
            </param>
            <when value="all">
                <param name="plot_threshold" type="hidden" value="0" />
            </when>
            <when value="some">
                <param argument="-p" name="plot_threshold" type="float" value="0"
                label="Plot Threshold"
                help="Plot aberrant chromosomes with quality at least 'float'" />
            </when>
            <when value="none" />
        </conditional>
        <section name="sec_restrict" expanded="false" title="Restrict to">
            <expand macro="macro_restrict" />
            <expand macro="macro_restrict" type="target" label_type="Target" />
        </section>
        <section name="sec_hmm" expanded="false" title="HMM Options">
            <param argument="--aberrant" name="aberrant_query" type="float" value="1" min="0" max="1"
            label="Estimated purity of the query sample"
            help="Estimate of the fractional contribution of the desired query tissue to the (possibly control-contaminated) query sample (default: 1 = no contamination with control tissue)" />
            <param argument="--aberrant" name="aberrant_control" type="float" value="1" min="0" max="1"
            label="Estimated purity of the control sample"
            help="Estimate of the fractional contribution of the desired control tissue to the (possibly query-contaminated) control sample (default: 1 = no contamination with query tissue)" />
            <conditional name="aberrant_optimization">
                <param name="do_optimize" type="select"
                label="Adjust sample purity estimates based on data?"
                help="Instead of treating your specified estimates of the sample purities as fixed values, the tool can use them as starting values for an iterative optimization that tries to estimate the sample purities from the data. Note: With estimate optimization enabled the final estimates will be reported as cell fraction (CF) estimates in the summary report.">
                    <option value="no">No, leave sample purities as specified</option>
                    <option value="yes">Yes, adjust purity estimates</option>
                </param>
                <when value="no" />
                <when value="yes">
                    <param argument="--optimize" name="lower_bound" type="float" value="0.3" min="1e-9" max="1"
                    label="Lower bound for adjusted estimate"
                    help="Constrains the final adjusted purity estimates not to be smaller than this value." />
                </when>
            </conditional>
            <conditional name="score_usage">
                <param name="compute_on" type="select" label="Use BAF and LRR annotations to call CNVs?"
                help="Using LRR information in addition to BAF values is the default and helps in dealing with random noise in the data. However, the tool is also able to call CNVs from BAF values alone in case your input does not feature LRR information.">
                    <option value="baf+lrr">Yes (requires input with both BAF and LRR subfields)</option>
                    <option value="baf">No, use BAF values exclusively</option>
                </param>
                <when value="baf+lrr">
                    <param argument="--BAF-weight" name="baf_weight" type="float" value="1" min="0" max="1"
                    label="Baf Weight" help="relative contribution from BAF" />

                    <param argument="--BAF-dev" name="baf_dev_query" type="float" value="0.04" min="0" max="1"
                    label="Query sample BAF deviation"
                    help="Expected BAF deviation in the query sample (default: 0.04)" />
                    <param argument="--BAF-dev" name="baf_dev_control" type="float" value="0.04" min="0" max="1"
                    label="Control sample BAF deviation"
                    help="Expected BAF deviation in the control sample (default: 0.04)" />

                    <param argument="--LRR-weight" name="lrr_weight" type="float" value="0.2" min="0" max="1"
                    label="LRR Weight"
                    help="Relative contribution from LRR. This option can have a big effect on the number of calls produced. With truly random noise (such as in simulated data), the value should be set high (1.0), but in the presence of systematic noise when LRR values are not informative, lower values result in cleaner calls (default: 0.2)." />
                    <param argument="--LRR-dev" name="lrr_dev_query" type="float" value="0.2" min="0" max="1"
                    label="Query sample LRR Deviation"
                    help="Expected LRR deviation in the query sample (default: 0.2)" />
                    <param argument="--LRR-dev" name="lrr_dev_control" type="float" value="0.2" min="0" max="1"
                    label="Control sample LRR Deviation"
                    help="Expected LRR deviation in the control sample (default: 0.2)" />
                    <param argument="--LRR-smooth-win" name="lrr_smooth_win" type="integer" value="10"
                    label="LRR Smoothing Window"
                    help="Window of LRR moving average smoothing (default: 10)" />
                </when>
                <when value="baf">
                    <param argument="--BAF-dev" name="baf_dev_query" type="float" value="0.04" min="0" max="1"
                    label="Query sample BAF deviation"
                    help="Expected BAF deviation in the query sample (default: 0.04)" />
                    <param argument="--BAF-dev" name="baf_dev_control" type="float" value="0.04" min="0" max="1"
                    label="Control sample BAF deviation"
                    help="Expected BAF deviation in the control sample (default: 0.04)" />
                    <param name="baf_weight" type="hidden" value="1" />
                    <param name="lrr_weight" type="hidden" value="0" />
                </when>
            </conditional>
            <param argument="--err-prob" name="err_prob" type="float" value="1e-4" label="Err Prob" help="Uniform error probability" />
            <param argument="--same-prob" name="same_prob" type="float" value="0.5" min="0" max="1" label="Same Prob">
                <help>
                    The prior probability of the query and the control sample being the same. 
                    Setting to 0 calls both independently, setting to 1 forces the same copy number state in both. (default: 0.5)
                </help>
            </param>
            <param argument="--xy-prob" name="xy_prob" type="float" min="0." max="1." label="Xy Prob" value="1e-9">
                <help>
                    The HMM probability of transition to another copy number state. 
                    Increasing this value leads to smaller and more frequent calls. 
                </help>
            </param>
        </section>
    </inputs>
    <outputs>
        <data name="output_cn" format="tabular" label="${input_file.name.rsplit('.',1)[0]}.cn"/>
        <data name="output_summary" format="tabular" label="${input_file.name.rsplit('.',1)[0]}.summary"/>
        <data name="output_plots" format="html" label="${input_file.name.rsplit('.',1)[0]}.plots">
            <filter>plotting['generate_plots'] != 'none'</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="3">
            <param name="input_file" ftype="vcf" value="cnv.vcf" />
            <output name="output_summary" file="cnv_summary.tab" compare="re_match" />
        </test>
        <test expect_num_outputs="2">
            <param name="input_file" ftype="vcf" value="cnv.vcf" />
            <conditional name="plotting">
                <param name="generate_plots" value="none" />
            </conditional>
            <output name="output_summary" file="cnv_summary.tab" compare="re_match" />
        </test>
        <test expect_num_outputs="3">
            <param name="input_file" ftype="vcf" value="cnv.vcf" />
            <param name="query_sample" value="test" />
            <param name="control_sample" value="test" />
            <output name="output_summary" file="cnv_pairwise_summary.tab" compare="re_match" />
        </test>
        <test expect_num_outputs="2">
            <param name="input_file" ftype="vcf" value="cnv_baf_only.vcf" />
            <conditional name="plotting">
                <param name="generate_plots" value="none" />
            </conditional>
            <conditional name="score_usage">
                <param name="compute_on" value="baf" />
            </conditional>
            <output name="output_summary" file="cnv_summary.tab" compare="re_match" />
        </test>
        <!-- Test region overlap option -->
        <test expect_num_outputs="3">
            <param name="input_file" ftype="vcf" value="cnv.vcf" />
            <section name="sec_restrict">
                <param name="regions_overlap" value="1"/>
            </section>
            <output name="output_summary" file="cnv_summary.tab" compare="re_match" />
            <assert_command>
                <has_text text="--regions-overlap" />
            </assert_command>
        </test>
    </tests>
    <help><![CDATA[
=====================================
 bcftools @EXECUTABLE@
=====================================

Copy number variation caller, requires Illumina's B-allele frequency (BAF) and Log R Ratio intensity (LRR). 
The HMM considers the following copy number states: CN 2 (normal), 1 (single-copy loss), 0 (complete loss), 3 (single-copy gain)

@REGIONS_HELP@
@TARGETS_HELP@

@BCFTOOLS_MANPAGE@#@EXECUTABLE@

@BCFTOOLS_WIKI@
]]>
    </help>
    <expand macro="citations" />
</tool>