Mercurial > repos > iuc > cnvkit_scatter

<macros>
    <token name="@VERSION_SUFFIX@">1</token>
    <token name="@TOOL_VERSION@">0.9.11</token>
    <xml name="requirements">
        <requirements>
            <requirement type="package" version="@TOOL_VERSION@">cnvkit</requirement>
            <requirement type="package" version="1.4.2">scikit-learn</requirement>
        </requirements>
    </xml>
    <xml name="reference_interface">
        <conditional name="reference_source">
            <param name="ref_selector" type="select" label="Choose the source for the reference genome">
                <option value="cached">Locally cached</option>
                <option value="history">History</option>
            </param>
            <when value="cached">
                <param argument="--fasta" optional="true" type="select" label="Reference genome">
                    <options from_data_table="fasta_indexes">
                        <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" />
                    </options>
                </param>
            </when>
            <when value="history">
                <param argument="--fasta" type="data" optional="true" format="fasta" label="Reference" help="Reference sequence" />
            </when>
        </conditional>
    </xml>
    <xml name="xrefs">
        <xrefs>
            <xref type="bio.tools">cnvkit</xref>
        </xrefs>
    </xml>
    <xml name="creators">
        <creator>
            <person givenName="Khaled" familyName="Jum'ah" url="https://github.com/khaled196" />
            <person givenName="Björn" familyName="Grüning" url="https://github.com/bgruening" />
            <person givenName="Katarzyna" familyName="Kamieniecka" url="https://github.com/kkamieniecka" />
            <person givenName="Krzysztof" familyName="Poterlowicz" url="https://github.com/poterlowicz-lab" />
            <organization name="poterlowicz-lab" url="https://github.com/poterlowicz-lab" />
        </creator>
    </xml>
    <xml name="creators_and_zahra">
        <creator>
            <person givenName="Khaled" familyName="Jum'ah" url="https://github.com/khaled196" />
            <person givenName="Björn" familyName="Grüning" url="https://github.com/bgruening" />
            <person givenName="Katarzyna" familyName="Kamieniecka" url="https://github.com/kkamieniecka" />
            <person givenName="zahra" familyName="Karimi" url="https://github.com/zahraK20" />
            <person givenName="Krzysztof" familyName="Poterlowicz" url="https://github.com/poterlowicz-lab" />
            <organization name="poterlowicz-lab" url="https://github.com/poterlowicz-lab" />
        </creator>
    </xml>
    <xml name="shared">
            <param argument="--method" type="select" label="Select the sequencing method of the input files" help="">
                <option value="hybrid" selected="True">hybridization capture </option>
                <option value="amplicon">targeted amplicon sequencing </option>
                <option value="wgs">whole genome sequencing </option>
            </param>
            <param argument="--segment-method" type="select" label="Method used in the 'segment' step" help="">
                <option value="cbs" selected="True">Circular Binary Segmentation CBS</option>
                <option value="flasso">Fused lasso, hybrid flasso</option>
                <option value="haar">a pure-Python implementation of HaarSeg, a wavelet-based method. Very fast and performs reasonably well on small panels, but tends to over-segment large datasets., hybrid haar</option>
                <option value="none">simply calculate the weighted mean log2 value of each chromosome arm. Useful for testing or debugging, or as a baseline for benchmarking other methods., hybrid none</option>
                <option value="hmm">experimental – a 3-state Hidden Markov Model suitable for most samples. Faster than CBS, and slower but more accurate than Haar. Requires the Python package pomegranate, as do the next two thods., hybrid hmm</option>
                <option value="hmm-tumor">experimental – a 5-state HMM suitable for finer-grained segmentation of good-quality tumor samples. In particular, this method can detect focal amplifications within a larger-scale, smaller-amplitude copy number gain, or focal deep deletions within a larger-scale hemizygous loss. Training this model takes a bit more CPU time than the simpler hmm method., hybrid hmm-tumor</option>
                <option value="hmm-germline">experimental – a 3-state HMM with fixed amplitude for the loss, neutral, and gain states corresponding to absolute copy numbers of 1, 2, and 3. Suitable for germline samples and single-cell sequencing of samples with mostly-diploid genomes that are not overly aneuploid., hybrid hmm-germline</option>
            </param>
            <param argument="--male-reference" type="boolean" checked="false" truevalue="--male-reference" falsevalue="" label="Use or assume a male reference" help="female samples will have +1 log-CNR of chrX; otherwise male samples would have -1 chrX" />
            <param argument="--countreads" type="boolean" checked="false" truevalue="--countreads" falsevalue="" label="Get read depths by counting read midpoints within each bin" help="" />
            <param argument="--drop-low-coverage" type="boolean" checked="false" truevalue="--drop-low-coverage" falsevalue="" label="Drop very-low-coverage bins before segmentation" help="To avoid false-positive deletions in poor-quality tumor samples" />
    </xml>
    <xml name="create_CNV_reference_file">
        <param name="input_sample_file" type="data" format="bam" label="Sample BAM file" help="" />
        <param argument="--normal" type="data" format="bam" label="Control BAM file" help="" />
        <param argument="--targets" type="data" format="bed" label="Capture BED regions" help="" />
    </xml>
    <xml name="advanced_no_reference">
        <param argument="--antitargets" optional="true" type="data" format="bed,tabular" label="Antitarget intervals" help="BED or list" />
        <param argument="--annotate" optional="true" type="data" format="bed,gff,tabular" label="Use gene models from this file to assign names to the target regions" help="Format: UCSC refFlat.txt or ensFlat.txt file preferred, or BED, interval list, GFF, or similar" />
        <param argument="--short-names" type="boolean" checked="false" truevalue="--short-names" falsevalue="" label="Reduce multi-accession bait labels" help="" />
        <param argument="--target-avg-size" type="integer" optional="true" label="Average size of split target bins" min="1" value="" help="" />
        <param argument="--access" optional="true" type="data" format="bed" label="Regions of accessible sequence on chromosomes BED" help="" />
        <param argument="--antitarget-avg-size" optional="true" type="integer" label="Average size of antitarget bins" min="1" value="" help="" />
        <param argument="--antitarget-min-size" optional="true" type="integer" label="Minimum size of antitarget bins" min="1" value="" help="" />
        <param argument="--cluster" optional="true" type="boolean" checked="false" truevalue="--cluster" falsevalue="" label="Calculate and use cluster-specific summary stats in the reference pool" help="" />
    </xml>
    <xml name="reuse_an_existing_cnv_reference_file">
        <param name="input_sample_file" type="data" format="bam" label="Sample file" help="" />
        <param argument="--reference" type="data" format="tabular" label="CNV reference CNN File" help="" />
    </xml>
    <xml name="output_section">
        <section name="output_section" title="Outputs" expanded="false">
            <param argument="--scatter" type="boolean" checked="false" truevalue="--scatter" falsevalue="" label="Create a whole-genome copy ratio profile as a PNG scatter plot" help="" />
            <param argument="--diagram" type="boolean" checked="false" truevalue="--diagram" falsevalue="" label="Create an ideogram of copy ratios on chromosomes as a PDF" help="" />
        </section>
    </xml>
    <xml name="autobin_optional">
            <param argument="--method" type="select" label="Select the sequencing method of the input files" help="">
                <option value="hybrid" selected="True">hybridization capture </option>
                <option value="amplicon">targeted amplicon sequencing </option>
                <option value="wgs">whole genome sequencing </option>
            </param>
            <param argument="--access" optional="true" type="data" format="bed" label="Sequencing-accessible genomic regions, or exons to use as possible targets" help="The output of refFlat2bed.py" />
            <param argument="--bp-per-bin" optional="true" type="integer" label=" Desired average number of sequencing read bases mapped to each bin" min="1" value="100000" help="" />
            <param argument="--target-max-size" optional="true" type="integer" label="Maximum size of target bins" min="1" value="20000" help="" />
            <param argument="--target-min-size" optional="true" type="integer" label="Minimum size of target bins" min="1" value="20" help="" />
            <param argument="--antitarget-max-size" optional="true" type="integer" label="Maximum size of antitarget bins" min="1" value="500000" help="" />
            <param argument="--antitarget-min-size" optional="true" type="integer" label="Minimum size of antitarget bins" min="1" value="500" help="" />
            <param argument="--annotate" optional="true" type="data" format="bed,gff,tabular" label="Use gene models from this file to assign names to the target regions" help="Format: UCSC refFlat.txt or ensFlat.txt file preferred, or BED, interval list, GFF, or similar" />
            <param argument="--short-names" type="boolean" checked="false" truevalue="--short-names" falsevalue="" label="Reduce multi-accession bait labels to be short and consistent" help="" />
            <param argument="--target-output-bed" optional="true" type="data" format="bed" label="Filename for target BED output" help="If not specified, constructed from the input file basename" />
            <param argument="--antitarget-output-bed" optional="true" type="data" format="bed" label="Filename for antitarget BED output" help="If not specified, constructed from the input file basename" />
    </xml>
    <xml name="filter">
        <param argument="--filter" type="select" multiple="true" label="Merge segments flagged by the specified filter(s) with the adjacent segment(s)." help="">
            <option value="ampdel" selected="True">ampdel</option>
            <option value="cn">cn</option>
            <option value="ci">ci</option>
            <option value="sem">sem</option>
        </param>
    </xml>
    <xml name="sample_sex">
        <param argument="--sample-sex" type="select" label="Method used in the 'segment' step" help="">
            <option value="Male" selected="True">Male</option>
            <option value="Female">Female</option>
        </param>
    </xml>
    <xml name="call_optional">
            <param argument="--method" type="select" label="Select the sequencing method of the input files" help="">
                <option value="threshold" selected="True">hybridization capture </option>
                <option value="clonal">targeted amplicon sequencing </option>
                <option value="none">whole genome sequencing </option>
            </param>
            <param argument="--center" type="select" label="Method used in the 'segment' step" help="">
                <option value="mean">mean</option>
                <option value="median" selected="True">median</option>
                <option value="mode">mode</option>
                <option value="biweight">biweight</option>
            </param>
            <param argument="--center-at" optional="true" type="float" label="Subtract a constant number from all log2 ratios" value="" help="For manual re-centering, in case the --center option gives unsatisfactory results" />
            <param argument="--thresholds" optional="true" type="text" label="Hard thresholds for calling each integer copy number, separated by commas" value="=-1.1,-0.25,0.2,0.7" help="Apply cutoffs to either original or rescaled log2 values" />
            <param argument="--ploidy" optional="true" type="integer" label="Ploidy of the sample cells" min="1" max="2" value="2" help="" />
            <param argument="--purity" optional="true" type="float" label="Estimated tumor cell fraction, a.k.a. purity or cellularity" min="0" max="1" value="" help="" />
            <param argument="--drop-low-coverage" type="boolean" checked="false" truevalue="--drop-low-coverage" falsevalue="" label="Drop very-low-coverage bins before segmentation" help="To avoid false-positive deletions in poor-quality tumor samples" />
            <param argument="--male-reference" type="boolean" checked="false" truevalue="--male-reference" falsevalue="" label="Use or assume a male reference" help="Was a male reference used? If so, expect half ploidy on chrX and chrY; otherwise, only chrY has half ploidy. In CNVkit, if a male reference was used, the neutral copy number ploidy of chrX is 1; chrY is haploid for either reference sex" />
    </xml>
    <xml name="additionally_SNP_process">
        <param argument="--vcf" optional="true" type="data" format="vcf" label="VCF file" help="VCF file name containing variants for calculation of b-allele frequencies" />
        <param argument="--sample-id" optional="true" type="text" label="Name of the sample in the VCF to use for b-allele frequency extraction" value="" help="" />
        <param argument="--normal-id" optional="true" type="text" label="Corresponding normal sample ID in the input VCF" value="" help="This sample is used to select only germline SNVs to calculate b-allele frequencies" />
        <param argument="--min-variant-depth" type="integer" min="1" value="20" optional="true" label="Minimum read depth for a SNV to be used in the b-allele frequency calculation" help="" />
        <param argument="--zygosity-freq" type="float" min="0" value="0.25" optional="true" label="Ignore VCF's genotypes and instead infer zygosity from allele frequencies" help="" />
    </xml>
    <xml name="diagram_optional">
            <param argument="--segment" optional="true" type="data" format="tabular" label="Segment" help="Segmentation calls cns, the output of the 'segment' command" />
            <param argument="--threshold" optional="true" type="float" label="Threshold" min="0" value="0.5" help="Copy number change threshold to label genes" />
            <param argument="--min-probes" optional="true" type="integer" label="Minimum propes" min="1" value="3" help="Minimum number of covered probes to label a gene" />
            <param argument="--male-reference" type="boolean" checked="false" truevalue="--male-reference" falsevalue="" label="MALE REFERENCE" help="Assume inputs were normalized to a male reference" />
            <param argument="--no-shift-xy" type="boolean" checked="false" truevalue="--no-shift-xy" falsevalue="" label="Don't adjust the X and Y chromosomes according to sample sex" help="" />
            <param argument="--chromosome" optional="true" type="text" label="Chromosome to display" value="" help="e.g. 'chr1' no chromosomal range allowed" />
    </xml>
    <xml name="diagram_plot">
        <param argument="--title" optional="true" type="text" label="Plot title" value="" help="" />
        <param argument="--no-gene-labels" type="boolean" checked="false" truevalue="--no-gene-labels" falsevalue="" label="Disable gene_name labels on plot useful when a lot of CNV were called" help="" />
    </xml>
    <xml name="heatmap_optional">
            <param argument="--by-bin" type="boolean" checked="false" truevalue="--by-bin" falsevalue="" label="Plot data x-coordinates by bin indices instead of genomic coordinates" help="" />
            <param argument="--chromosome" optional="true" type="text" label="Chromosome range" value="" help="Chromosome or chromosomal range, e.g. 'chr1' or 'chr1:2333000-2444000'" />
            <param argument="--desaturate" type="boolean" checked="false" truevalue="--desaturate" falsevalue="" label="Tweak color saturation to focus on significant changes" help="" />
            <param argument="--male-reference" type="boolean" checked="false" truevalue="--male-reference" falsevalue="" label="MALE REFERENCE" help="Assume inputs were normalized to a male reference" />
            <param argument="--no-shift-xy" type="boolean" checked="false" truevalue="--no-shift-xy" falsevalue="" label="Don't adjust the X and Y chromosomes according to sample sex" help="" />
            <param argument="--vertical" type="boolean" checked="false" truevalue="--vertical" falsevalue="" label="Plot heatmap with samples as X-axis instead of Y-axis" help="" />
            <param argument="--delimit-samples" type="boolean" checked="false" truevalue="--delimit-samples" falsevalue="" label="Add an horizontal delimitation line between each sample" help="" />
            <param argument="--title" optional="true" type="text" label="Plot title" value="" help="" />
    </xml>
    <xml name="reference_optional">
            <param argument="--cluster" type="boolean" checked="false" truevalue="--cluster" falsevalue="" label="Calculate and store summary stats for clustered subsets of the normal samples with similar coverage profiles" help="" />
            <param argument="--min-cluster-size" optional="true" type="integer" label="Minimum cluster size to keep in reference profiles" min="1" value="4" help="" />
            <param argument="--male-reference" type="boolean" checked="false" truevalue="--male-reference" falsevalue="" label="Create a male reference" help="shift female samples' chrX log-coverage by -1, so the reference chrX average is -1. Otherwise, shift male samples' chrX by +1, so the reference chrX average is 0" />
    </xml>
    <xml name="construct_CNV_ref_with_natural_expected_number">
        <param argument="--targets" optional="true" type="data" format="bed" label="Target intervals bed file" help="" />
        <param argument="--antitargets" optional="true" type="data" format="bed" label="Antitarget intervals bed file" help="" />
    </xml>
    <xml name="disable_specific_automatic_bias_corrections">
        <param argument="--no-gc" type="boolean" checked="false" truevalue="--no-gc" falsevalue="" label="Skip GC correction" help="" />
        <param argument="--no-edge" type="boolean" checked="false" truevalue="--no-edge" falsevalue="" label="skip edge-effect correction" help="" />
        <param argument="--no-rmask" type="boolean" checked="false" truevalue="--no-rmask" falsevalue="" label="skip repeat master correction" help="" />
    </xml>
    <xml name="scatter_optional">
            <param argument="--segment" optional="true" type="data" format="tabular" label="Segment" help="Segmentation calls cns, the output of the 'segment' command" />
            <param argument="--chromosome" optional="true" type="text" label="Chromosome range" value="" help="Chromosome or chromosomal range, e.g. 'chr1' or 'chr1:2333000-2444000'" />
            <param argument="--gene" optional="true" type="text" label="Name of gene or genes comma-separated to display" value="" help="" />
            <param argument="--range-list" optional="true" type="data" format="bed" label="Range list" help="File listing the chromosomal ranges to display, as BED"/>
            <param argument="--width" optional="true" type="integer" label="Width" min="1" value="1000000" help="Width of margin to show around the selected genes or small chromosomal region" />
    </xml>
    <xml name="scatter_plot">
        <param argument="--antitarget-marker" optional="true" type="text" label="Antitarget marker" value="same as targets" help="Plot antitargets using this symbol when plotting in a selected chromosomal region"/>
        <param argument="--by-bin" type="boolean" checked="false" truevalue="--by-bin" falsevalue="" label="Plot data x-coordinates by bin indices instead of genomic coordinates" help=""/>
        <param argument="--segment-color" optional="true" type="text" label="Segment color" value="red" help=""/>
        <param argument="--title" optional="true" type="text" label="Plot title" value="" help=""/>
        <param argument="--trend" type="boolean" checked="false" truevalue="--trend" falsevalue="" label="Draw a smoothed local trendline on the scatter plot" help=""/>
        <param argument="--y-max" optional="true" type="integer" label="y-axis upper limit" min="1" value="" help=""/>
        <param argument="--y-min" optional="true" type="integer" label="y-axis lower limit" min="1" value="" help=""/>
        <param argument="--fig-size" optional="true" type="float" label="Width and height of the plot in inches" value="" help="Example 6.4 4.8, the space between the two inputs is important"/>
    </xml>
    <xml name="segment_optional">
            <param argument="--dataframe" type="text" optional="true" label="Data frame" value="" help="File name to save the raw R dataframe emitted by CBS or Fused Lasso, example dataframe.r"/>
            <param argument="--method" type="select" label="Segmentation method" help="">
                <option value="cbs" selected="True">Circular Binary Segmentation CBS method,hybrid CBS</option>
                <option value="flasso">Fused lasso, hybrid flasso</option>
                <option value="haar">A pure-Python implementation of HaarSeg, a wavelet-based method. Very fast and performs reasonably well on small panels, but tends to over-segment large datasets., hybrid haar</option>
                <option value="none">simply calculate the weighted mean log2 value of each chromosome arm. Useful for testing or debugging, or as a baseline for benchmarking other methods., hybrid none</option>
                <option value="hmm">experimental – a 3-state Hidden Markov Model suitable for most samples. Faster than CBS, and slower but more accurate than Haar. Requires the Python package pomegranate, as do the next two methods., hybrid hmm</option>
                <option value="hmm-tumor">experimental – a 5-state HMM suitable for finer-grained segmentation of good-quality tumor samples. In particular, this method can detect focal amplifications within a larger-scale, smaller-amplitude copy number gain, or focal deep deletions within a larger-scale hemizygous loss. Training this model takes a bit more CPU time than the simpler hmm method., hybrid hmm-tumor</option>
                <option value="hmm-germline">experimental – a 3-state HMM with fixed amplitude for the loss, neutral, and gain states corresponding to absolute copy numbers of 1, 2, and 3. Suitable for germline samples and single-cell sequencing of samples with mostly-diploid genomes that are not overly aneuploid., hybrid hmm-germline</option>
            </param>
            <param argument="--threshold" optional="true" type="integer" label="Significance threshold" min="1" help="To accept breakpoints during segmentation. For HMM methods, this is the smoothing window size"/>
            <param argument="--drop-low-coverage" type="boolean" checked="false" truevalue="--drop-low-coverage" falsevalue="" label="Drop very-low-coverage bins before segmentation" help="To avoid false-positive deletions in poor-quality tumor samples"/>
            <param argument="--drop-outliers" optional="true" type="integer" label="Drop outliers" min="1" value="10" help=""/>
            <param argument="--smooth-cbs" type="boolean" checked="false" truevalue="--smooth-cbs" falsevalue="" label="Perform an additional smoothing before CBS segmentations" help=""/>
    </xml>
    <xml name="sample_sex_condition">
        <conditional name="Sample_sex">
            <param name="sex" type="select" label="Sample sex availabel" help="">
                <option value="no" selected="True">Sample sex unknown</option>
                <option value="yes">Select sample sex</option>
            </param>
            <when value="yes">
                <expand macro="sample_sex" />
            </when>
            <when value="no">
            </when>
        </conditional>
    </xml>
    <xml name="genemetrics_segmetrics_statistics">
          <param argument="--mean" type="boolean" checked="true" truevalue="--mean" falsevalue="" label="Mean log2-ratio" help="" />
          <param argument="--median" type="boolean" checked="false" truevalue="--median" falsevalue="" label="Median" help="" />
          <param argument="--mode" type="boolean" checked="false" truevalue="--mode" falsevalue="" label="mode" help="peak density of log2 ratios" />
          <param argument="--ttest" type="boolean" checked="false" truevalue="--ttest" falsevalue="" label="One-sample t-test" help="bin log2 ratios versus 0.0" />
          <param argument="--stdev" type="boolean" checked="false" truevalue="--stdev" falsevalue="" label="Standard deviation" help="" />
          <param argument="--sem" type="boolean" checked="false" truevalue="--sem" falsevalue="" label="Standard error of the mean" help="" />
          <param argument="--mad" type="boolean" checked="false" truevalue="--mad" falsevalue="" label="Median absolute deviation" help="" />
          <param argument="--mse" type="boolean" checked="false" truevalue="--mse" falsevalue="" label="mean squared error" help="" />
          <param argument="--iqr" type="boolean" checked="false" truevalue="--iqr" falsevalue="" label="Inter-quartile range" help="" />
          <param argument="--bivar" type="boolean" checked="false" truevalue="--bivar" falsevalue="" label="Tukeys biweight midvariance" help="" />
          <param argument="--ci" type="boolean" checked="false" truevalue="--ci" falsevalue="" label="confidence interval" help="" />
          <param argument="--pi" type="boolean" checked="false" truevalue="--pi" falsevalue="" label="prediction interval" help="" />
          <param argument="--alpha" type="boolean" checked="false" truevalue="--alpha" falsevalue="" label="alpha" help="" />
          <param argument="--bootstrap" type="boolean" checked="false" truevalue="--bootstrap" falsevalue="" label="estimate confidence interval" help="number of bootstrp interations" />
    </xml>
    <xml name="citations">
        <citations>
            <citation type="doi">10.1371/journal.pcbi.1004873</citation>
        </citations>
    </xml>
</macros>
author	iuc
date	Fri, 13 Dec 2024 23:59:03 +0000
parents	76f9a071da54
children