Galaxy |

Changeset 1:5b8288d21d5e (2025-03-01)

Previous changeset 0:9ca92159a21a (2025-01-20)

Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cnvkit commit fc1282ec68b346988203ead860e9b9d6a47e9efb

modified:
macros.xml
nexus_ogt.xml
test-data/sample.cnv.vcf

diff -r 9ca92159a21a -r 5b8288d21d5e macros.xml
--- a/macros.xml Mon Jan 20 16:41:15 2025 +0000
+++ b/macros.xml Sat Mar 01 12:04:23 2025 +0000

b'@@ -1,10 +1,10 @@\n <macros>\n- <token name="@VERSION_SUFFIX@">1</token>\n- <token name="@TOOL_VERSION@">0.9.11</token>\n+ <token name="@VERSION_SUFFIX@">0</token>\n+ <token name="@TOOL_VERSION@">0.9.12</token>\n <xml name="requirements">\n <requirements>\n <requirement type="package" version="@TOOL_VERSION@">cnvkit</requirement>\n- <requirement type="package" version="1.4.2">scikit-learn</requirement>\n+ <requirement type="package" version="1.21">samtools</requirement>\n </requirements>\n </xml>\n <xml name="reference_interface">\n@@ -56,13 +56,13 @@\n <option value="wgs">whole genome sequencing </option>\n </param>\n <param argument="--segment-method" type="select" label="Method used in the \'segment\' step" help="">\n- <option value="cbs" selected="True">Circular Binary Segmentation CBS</option>\n- <option value="flasso">Fused lasso, hybrid flasso</option>\n- <option value="haar">a pure-Python implementation of HaarSeg, a wavelet-based method. Very fast and performs reasonably well on small panels, but tends to over-segment large datasets., hybrid haar</option>\n- <option value="none">simply calculate the weighted mean log2 value of each chromosome arm. Useful for testing or debugging, or as a baseline for benchmarking other methods., hybrid none</option>\n- <option value="hmm">experimental \xe2\x80\x93 a 3-state Hidden Markov Model suitable for most samples. Faster than CBS, and slower but more accurate than Haar. Requires the Python package pomegranate, as do the next two thods., hybrid hmm</option>\n- <option value="hmm-tumor">experimental \xe2\x80\x93 a 5-state HMM suitable for finer-grained segmentation of good-quality tumor samples. In particular, this method can detect focal amplifications within a larger-scale, smaller-amplitude copy number gain, or focal deep deletions within a larger-scale hemizygous loss. Training this model takes a bit more CPU time than the simpler hmm method., hybrid hmm-tumor</option>\n- <option value="hmm-germline">experimental \xe2\x80\x93 a 3-state HMM with fixed amplitude for the loss, neutral, and gain states corresponding to absolute copy numbers of 1, 2, and 3. Suitable for germline samples and single-cell sequencing of samples with mostly-diploid genomes that are not overly aneuploid., hybrid hmm-germline</option>\n+ <option value="cbs" selected="True">CBS: Circular Binary Segmentation (default, precise)</option>\n+ <option value="flasso">Flasso: Fused Lasso; smoother segments, fewer breakpoints</option>\n+ <option value="haar">Haar: Haar wavelet transform; detects abrupt changes</option>\n+ <option value="none">None: No segmentation; outputs bin-level data as segments</option>\n+ <option value="hmm">Hmm: Basic Hidden Markov Model (generic use)</option>\n+ <option value="hmm-tumor">Hmm-tumor: HMM tailored for tumor samples (somatic CNVs)</option>\n+ <option value="hmm-germline">Hmm-germline: HMM for germline (inherited) variants (diploid assumption)</option>\n </param>\n <param argument="--male-reference" type="boolean" checked="false" truevalue="--male-reference" falsevalue="" label="Use or assume a male reference" help="female samples will have +1 log-CNR of chrX; otherwise male samples would have -1 chrX" />\n <param argument="--countreads" type="boolean" checked="false" truevalue="--countreads" falsevalue="" label="Get read depths by counting read midpoints within each bin" help="" />\n@@ -70,7 +70,7 @@\n </xml>\n <xml name="create_CNV_reference_file">\n <param name="input_sample_file" type="data" format="bam" label="Sample BAM file" help="" />\n- <param argument="--normal" type="data" format="bam" label="Control BAM file" help="" />\n+ <param argument="--normal" o'..b'nt="--trend" type="boolean" checked="false" truevalue="--trend" falsevalue="" label="Draw a smoothed local trendline on the scatter plot" help=""/>\n <param argument="--y-max" optional="true" type="integer" label="y-axis upper limit" min="1" value="" help=""/>\n <param argument="--y-min" optional="true" type="integer" label="y-axis lower limit" min="1" value="" help=""/>\n <param argument="--fig-size" optional="true" type="float" label="Width and height of the plot in inches" value="" help="Example 6.4 4.8, the space between the two inputs is important"/>\n </xml>\n <xml name="segment_optional">\n- <param argument="--dataframe" type="text" optional="true" label="Data frame" value="" help="File name to save the raw R dataframe emitted by CBS or Fused Lasso, example dataframe.r"/>\n+ <param argument="--dataframe" type="text" label="Data frame" help="File name to save the raw R dataframe emitted by CBS or Fused Lasso, example dataframe.r"/>\n <param argument="--method" type="select" label="Segmentation method" help="">\n- <option value="cbs" selected="True">Circular Binary Segmentation CBS method,hybrid CBS</option>\n- <option value="flasso">Fused lasso, hybrid flasso</option>\n- <option value="haar">A pure-Python implementation of HaarSeg, a wavelet-based method. Very fast and performs reasonably well on small panels, but tends to over-segment large datasets., hybrid haar</option>\n- <option value="none">simply calculate the weighted mean log2 value of each chromosome arm. Useful for testing or debugging, or as a baseline for benchmarking other methods., hybrid none</option>\n- <option value="hmm">experimental \xe2\x80\x93 a 3-state Hidden Markov Model suitable for most samples. Faster than CBS, and slower but more accurate than Haar. Requires the Python package pomegranate, as do the next two methods., hybrid hmm</option>\n- <option value="hmm-tumor">experimental \xe2\x80\x93 a 5-state HMM suitable for finer-grained segmentation of good-quality tumor samples. In particular, this method can detect focal amplifications within a larger-scale, smaller-amplitude copy number gain, or focal deep deletions within a larger-scale hemizygous loss. Training this model takes a bit more CPU time than the simpler hmm method., hybrid hmm-tumor</option>\n- <option value="hmm-germline">experimental \xe2\x80\x93 a 3-state HMM with fixed amplitude for the loss, neutral, and gain states corresponding to absolute copy numbers of 1, 2, and 3. Suitable for germline samples and single-cell sequencing of samples with mostly-diploid genomes that are not overly aneuploid., hybrid hmm-germline</option>\n+ <option value="cbs" selected="True">CBS: Circular Binary Segmentation (default, precise)</option>\n+ <option value="flasso">Flasso: Fused Lasso; smoother segments, fewer breakpoints</option>\n+ <option value="haar">Haar: Haar wavelet transform; detects abrupt changes</option>\n+ <option value="none">None: No segmentation; outputs bin-level data as segments</option>\n+ <option value="hmm">Hmm: Basic Hidden Markov Model (generic use)</option>\n+ <option value="hmm-tumor">Hmm-tumor: HMM tailored for tumor samples (somatic CNVs)</option>\n+ <option value="hmm-germline">Hmm-germline: HMM for germline (inherited) variants (diploid assumption)</option>\n </param>\n <param argument="--threshold" optional="true" type="integer" label="Significance threshold" min="1" help="To accept breakpoints during segmentation. For HMM methods, this is the smoothing window size"/>\n <param argument="--drop-low-coverage" type="boolean" checked="false" truevalue="--drop-low-coverage" falsevalue="" label="Drop very-low-coverage bins before segmentation" help="To avoid false-positive deletions in poor-quality tumor samples"/>\n'

diff -r 9ca92159a21a -r 5b8288d21d5e nexus_ogt.xml
--- a/nexus_ogt.xml Mon Jan 20 16:41:15 2025 +0000
+++ b/nexus_ogt.xml Sat Mar 01 12:04:23 2025 +0000

[

@@ -30,11 +30,11 @@
             --output sample.cnv.txt
     ]]></command>
      <inputs>
-        <param name="input_segmented_file" type="data" format="tabular" label="Segmented Copy Ratio Data File (cns file)" help="The output of the CNVkit 'fix' sub-command" />
+        <param name="input_segmented_file" type="data" format="cns,cnr" label="Segmented Copy Ratio Data File (cns/cnr file)" help="The output of the CNVkit 'fix' sub-command" />
         <param name="input_vcf" type="data" format="vcf" label="VCF of SNVs for the Same Sample" help="Used to calculate b-allele frequencies" />
         <section name="advanced_settings" title="Advanced settings" expanded="false">
-            <param argument="--sample-id" optional="true" type="text" label="Sample ID" value="" help="Sample name to write in the genotype field of the output VCF file" />
-            <param argument="--normal-id" optional="true" type="text" label="Normal Id" value="" help="Corresponding normal sample ID in the input VCF" />
+            <param argument="--sample-id" type="text" label="Sample ID" help="Sample name to write in the genotype field of the output VCF file" />
+            <param argument="--normal-id" type="text" label="Normal Id" help="Corresponding normal sample ID in the input VCF" />
             <param argument="--min-variant-depth" optional="true" type="integer" label="Minimum Variant Depth" min="1" value="20" help="Minimum read depth for a SNP in the VCF to be counted. [Default: 20]" />
             <param argument="--zygosity-freq" optional="true" type="float" label="Zygosity Frequency" min="0" value="0.25" help="Ignore VCF's genotypes (GT field) and instead infer zygosity from allele frequencies. [Default if used without a number: 0.25]" />
             <param argument="--min-weight" optional="true" type="float" label="Minimum Weight" min="0" max="1" value="0.0" help="Minimum weight (between 0 and 1) for a bin to be included in the output. [Default: 0.0]" />
@@ -45,7 +45,7 @@
     </outputs>
        <tests>
         <test expect_num_outputs="1">
-            <param name="input_segmented_file" ftype="tabular" value="sample.cnr" />
+            <param name="input_segmented_file" ftype="cnr" value="sample.cnr" />
             <param name="input_vcf" ftype="vcf" value="sample.cnv.vcf" />
             <output name="CNVs_NexusOGT" file="sample.cnv.txt" />
         </test>
@@ -54,6 +54,24 @@
       This tool converts CNVkit log2 copy ratios and B-allele frequencies to the Nexus "Custom-OGT" format,
       enabling comprehensive CNV analysis with allelic imbalance insights. The output is compatible with Nexus software,
       facilitating advanced genomic interpretation and integration with other datasets.
+
+-----
+
+**Bin-level log2 ratios (.cnr)**
+
+Tabular file containing normalized log2 ratios for small genomic bins (divided regions of the genome). Used to detect raw copy number variations (CNVs) before segmentation.
+
+.. csv-table::
+   :header-rows: 0
+
+    "chromosome","Genomic chromosome (e.g., chr1, chrX)"
+    "start","Start position of the bin."
+    "end","End position of the bin."
+    "gene","Gene name(s) overlapping the bin (if applicable)."
+    "log2","Normalized log2 ratio (sample coverage / reference coverage)."
+    "depth","Average read depth in the bin."
+    "weight","Reliability weight of the bin (higher = more reliable)."
+
     ]]></help>
     <expand macro="citations" />
</tool>

diff -r 9ca92159a21a -r 5b8288d21d5e test-data/sample.cnv.vcf
--- a/test-data/sample.cnv.vcf Mon Jan 20 16:41:15 2025 +0000
+++ b/test-data/sample.cnv.vcf Sat Mar 01 12:04:23 2025 +0000

@@ -1,6 +1,6 @@
##fileformat=VCFv4.2
-##fileDate=20250120
-##source=CNVkit v0.9.11
+##fileDate=20250203
+##source=CNVkit v0.9.12
##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">