changeset 7:555971edd46e draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/manta commit 569d2234f8a576d5c4fdae120a32418c50436ac2
author artbio
date Tue, 20 Feb 2024 08:31:27 +0000
parents cb5691381acb
children
files README.rst candidateSV.vcf.gz candidateSmallIndels.vcf.gz manta.xml manta_macros.xml somaticSV.vcf.gz test-data/candidateSV.vcf.gz test-data/candidateSmallIndels.vcf.gz test-data/conf_file_1.ini test-data/conf_file_2.ini test-data/conf_file_3.ini test-data/conf_file_4.ini test-data/conf_file_5.ini test-data/somaticSV.vcf.gz
diffstat 14 files changed, 270 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/README.rst	Thu Jun 08 17:36:38 2023 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-# Wrapper of the variant caller 'MANTA', for use it as a Galaxy-based tool :
-
-Run the following commands in a terminal:
-
-planemo s
-
-Open in your browser:
-
-http://127.0.0.1:9090/
-
Binary file candidateSV.vcf.gz has changed
Binary file candidateSmallIndels.vcf.gz has changed
--- a/manta.xml	Thu Jun 08 17:36:38 2023 +0000
+++ b/manta.xml	Tue Feb 20 08:31:27 2024 +0000
@@ -1,4 +1,4 @@
-<tool id="manta" name="Manta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
+<tool id="manta" name="Manta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <description>Manta calls structural variants (SVs) and indels from mapped paired-end sequencing reads.</description>
     <macros>
         <import>manta_macros.xml</import>
@@ -10,6 +10,9 @@
     @pipefail@
     @set_reference_fasta_filename@
     #set run_dir = './MantaWorkflow'
+    configManta=\$(which configManta.py) &&
+    PATH=\${configManta/"configManta.py"/}:\$PATH &&
+    printenv &&
     cp $__tool_directory__/configManta.py.ini configManta.py.ini &&
     #if str( $bam_input.bam_input_selector ) == "not_tumor_bam":
     ln -s '$bam_input.normal_bam_file' normal.bam &&
@@ -26,7 +29,7 @@
     #end if
     #if str( $set_configuration.set_configuration_switch ) == "Customized":
         rm ./configManta.py.ini &&
-        python '$__tool_directory__/customConfigManta.py'
+        python2 '$__tool_directory__/customConfigManta.py'
         --minCandidateVariantSize '$set_configuration.minCandidateVariantSize'
         --rnaMinCandidateVariantSize '$set_configuration.rnaMinCandidateVariantSize'
         --minEdgeObservations '$set_configuration.minEdgeObservations'
@@ -112,7 +115,6 @@
                 <param name="useOverlapPairEvidence" type="integer" value="0" label="useOverlapPairEvidence" help="Set if an overlapping read pair will be considered as evidence. Set this value &lt;= 0 to skip overlapping read pairs."/>
             </when>
         </conditional>
-        <param name="config_file_check" type="boolean" label="output conf file" checked="False" help="Show configuration file on history"/>
         <param name="candidateSV_check" type="boolean" label="Unfiltered structural variants" checked="False"
                help="All unscored structural variant candidates"/>
         <param name="candidateSmallIndels_check" type="boolean" label="Unfiltered small indel candidates" checked="False"
@@ -122,9 +124,7 @@
                      reflect any information from the tumor sample" />
     </inputs>
     <outputs>
-        <data format="tabular" name="conf_file" label="conf_file.ini" from_work_dir="./configManta.py.ini">
-            <filter>config_file_check == True</filter>
-        </data>
+        <data format="txt" name="conf_file" label="conf_file.ini" from_work_dir="./configManta.py.ini"/>
         <data format="vcf_bgzip" name="candidateSV" label="Manta unfiltered variants" from_work_dir="MantaWorkflow/results/variants/candidateSV.vcf.gz">
             <filter>candidateSV_check == True</filter>
         </data>
@@ -139,7 +139,7 @@
         </data>
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="3">
             <param name="reference_source_selector" value="cached"/>
             <param name="index" value="hg19"/>
             <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/>
@@ -148,10 +148,11 @@
             <param name="set_configuration_switch" value="Default_config_file"/>
             <param name="callMemMb" value="1000"/>
             <param name="candidateSmallIndels_check" value="True"/>
+            <output name="conf_file" file="conf_file_1.ini" ftype="txt"/>
             <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/>
             <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/>
         </test>
-        <test>
+        <test expect_num_outputs="3">
             <param name="reference_source_selector" value="cached"/>
             <param name="index" value="hg19"/>
             <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/>
@@ -160,10 +161,11 @@
             <param name="set_configuration_switch" value="Customized"/>
             <param name="callMemMb" value="1000"/>
             <param name="candidateSmallIndels_check" value="True"/>
+            <output name="conf_file" file="conf_file_2.ini" ftype="txt"/>
             <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/>
             <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/>
         </test>
-        <test>
+        <test expect_num_outputs="3">
                 <param name="reference_source_selector" value="cached"/>
                 <param name="index" value="hg19"/>
                 <param name="bam_input_selector" value="tumor_bam" dbkey="hg19"/>
@@ -172,10 +174,11 @@
                 <param name="set_configuration_switch" value="Default_config_file"/>
                 <param name="callMemMb" value="1000"/>
                 <param name="candidateSmallIndels_check" value="True"/>
+                <output name="conf_file" file="conf_file_3.ini" ftype="txt"/>
                 <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/>
                 <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/>
         </test>
-        <test>
+        <test expect_num_outputs="3">
                 <param name="reference_source_selector" value="history"/>
                 <param name="ref_file" ftype="fasta" value="hg19_region.fa"/>
                 <param name="bam_input_selector" value="tumor_bam"/>
@@ -184,10 +187,11 @@
                 <param name="set_configuration_switch" value="Default_config_file"/>
                 <param name="callMemMb" value="1000"/>
                 <param name="candidateSV_check" value="True"/>
+                <output name="conf_file" file="conf_file_4.ini" ftype="txt"/>
                 <output name="candidateSV" file="candidateSV.vcf.gz" decompress="true" lines_diff="6"/>
                 <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/>
         </test>
-        <test>
+        <test expect_num_outputs="3">
                 <param name="reference_source_selector" value="history"/>
                 <param name="ref_file" ftype="fasta" value="hg19_region.fa"/>
                 <param name="bam_input_selector" value="tumor_bam"/>
@@ -196,6 +200,7 @@
                 <param name="set_configuration_switch" value="Default_config_file"/>
                 <param name="callMemMb" value="1000"/>
                 <param name="candidateSmallIndels_check" value="True"/>
+                <output name="conf_file" file="conf_file_5.ini" ftype="txt"/>
                 <output name="candidateSmallIndels" file="candidateSmallIndels.vcf.gz" decompress="true" lines_diff="6"/>
                 <output name="somaticSV" file="somaticSV.vcf.gz" decompress="true" lines_diff="6"/>
         </test>
--- a/manta_macros.xml	Thu Jun 08 17:36:38 2023 +0000
+++ b/manta_macros.xml	Tue Feb 20 08:31:27 2024 +0000
@@ -1,17 +1,18 @@
 <macros>
 
     <token name="@TOOL_VERSION@">1.6</token>
-    <token name="@VERSION_SUFFIX@">8</token>
+    <token name="@VERSION_SUFFIX@">9</token>
+    <token name="@PROFILE@">20.05</token>
     <token name="@pipefail@"><![CDATA[set -o | grep -q pipefail && set -o pipefail;]]></token>
 
     <token name="@set_reference_fasta_filename@"><![CDATA[
     #set $reference_fasta_filename = "localref.fa"
 
     #if str( $reference_source.reference_source_selector ) == "history":
-    ln -s '${reference_source.ref_file}' '${reference_fasta_filename}' &&
-    samtools faidx '${reference_fasta_filename}' 2>&1 || echo "Error running samtools faidx for Manta" >&2 &&
+        ln -s '${reference_source.ref_file}' '${reference_fasta_filename}' &&
+        samtools faidx '${reference_fasta_filename}' 2>&1 || echo "Error running samtools faidx for Manta" >&2 &&
     #else:
-    #set $reference_fasta_filename = str( $reference_source.index.fields.path )
+        #set $reference_fasta_filename = str( $reference_source.index.fields.path )
     #end if
     ]]></token>
 
@@ -28,8 +29,8 @@
 
     <xml name="requirements">
         <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">manta</requirement>
             <requirement type="package" version="1.7">samtools</requirement>
-            <requirement type="package" version="@TOOL_VERSION@">manta</requirement>
         </requirements>
     </xml>
 
Binary file somaticSV.vcf.gz has changed
Binary file test-data/candidateSV.vcf.gz has changed
Binary file test-data/candidateSmallIndels.vcf.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/conf_file_1.ini	Tue Feb 20 08:31:27 2024 +0000
@@ -0,0 +1,58 @@
+
+#
+# This section contains all configuration settings for the top-level manta workflow,
+#
+[manta]
+
+referenceFasta = /illumina/development/Isis/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa
+
+# Run discovery and candidate reporting for all SVs/indels at or above this size
+# Separate option (to provide different default) used for runs in RNA-mode
+minCandidateVariantSize = 8
+rnaMinCandidateVariantSize = 1000
+
+# Remove all edges from the graph unless they're supported by this many 'observations'.
+# Note that one supporting read pair or split read usually equals one observation, but evidence is sometimes downweighted.
+minEdgeObservations = 3
+
+# If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge.
+# Set to 0 to turn this filtration off
+graphNodeMaxEdgeCount = 10
+
+# Run discovery and candidate reporting for all SVs/indels with at least this
+# many spanning support observations
+minCandidateSpanningCount = 3
+
+# After candidate identification, only score and report SVs/indels at or above this size:
+minScoredVariantSize = 50
+
+# minimum VCF "QUAL" score for a variant to be included in the diploid vcf:
+minDiploidVariantScore = 10
+
+# VCF "QUAL" score below which a variant is marked as filtered in the diploid vcf:
+minPassDiploidVariantScore = 20
+
+# minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf:
+minPassDiploidGTScore = 15
+
+# somatic quality scores below this level are not included in the somatic vcf:
+minSomaticScore = 10
+
+# somatic quality scores below this level are filtered in the somatic vcf:
+minPassSomaticScore = 30
+
+# Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote
+# locations with poor mapping quality, which pair to confidently mapping reads near the insertion locus. These reads
+# can help to fully assemble longer insertions, under certain circumstances this feature can add a very large runtime
+# burden. For instance, given the very high chimeric pair rates found in degraded FFPE samples, the runtime of the read
+# retrieval process can be unpredicable. For this reason the feature is disabled by default for somatic variant calling.
+# This feature can be enabled/disabled separately for germline and cancer calling below.
+#
+# Here "CancerCallingModes" includes tumor-normal subtraction and tumor-only calling. "GermlineCallingModes" includes
+# all other calling modes.
+enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1
+enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0
+
+# Set if an overlapping read pair will be considered as evidence
+# Set to 0 to skip overlapping read pairs
+useOverlapPairEvidence = 0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/conf_file_2.ini	Tue Feb 20 08:31:27 2024 +0000
@@ -0,0 +1,16 @@
+[manta]
+referenceFasta = /dummy/path/to/genome.fa
+enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1
+minPassSomaticScore = 30
+minSomaticScore = 10
+minCandidateVariantSize = 8
+minPassDiploidVariantScore = 20
+useOverlapPairEvidence = 0
+minPassDiploidGTScore = 15
+graphNodeMaxEdgeCount = 10
+minEdgeObservations = 3
+minDiploidVariantScore = 10
+minCandidateSpanningCount = 3
+enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0
+rnaMinCandidateVariantSize = 1000
+minScoredVariantSize = 50
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/conf_file_3.ini	Tue Feb 20 08:31:27 2024 +0000
@@ -0,0 +1,58 @@
+
+#
+# This section contains all configuration settings for the top-level manta workflow,
+#
+[manta]
+
+referenceFasta = /illumina/development/Isis/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa
+
+# Run discovery and candidate reporting for all SVs/indels at or above this size
+# Separate option (to provide different default) used for runs in RNA-mode
+minCandidateVariantSize = 8
+rnaMinCandidateVariantSize = 1000
+
+# Remove all edges from the graph unless they're supported by this many 'observations'.
+# Note that one supporting read pair or split read usually equals one observation, but evidence is sometimes downweighted.
+minEdgeObservations = 3
+
+# If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge.
+# Set to 0 to turn this filtration off
+graphNodeMaxEdgeCount = 10
+
+# Run discovery and candidate reporting for all SVs/indels with at least this
+# many spanning support observations
+minCandidateSpanningCount = 3
+
+# After candidate identification, only score and report SVs/indels at or above this size:
+minScoredVariantSize = 50
+
+# minimum VCF "QUAL" score for a variant to be included in the diploid vcf:
+minDiploidVariantScore = 10
+
+# VCF "QUAL" score below which a variant is marked as filtered in the diploid vcf:
+minPassDiploidVariantScore = 20
+
+# minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf:
+minPassDiploidGTScore = 15
+
+# somatic quality scores below this level are not included in the somatic vcf:
+minSomaticScore = 10
+
+# somatic quality scores below this level are filtered in the somatic vcf:
+minPassSomaticScore = 30
+
+# Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote
+# locations with poor mapping quality, which pair to confidently mapping reads near the insertion locus. These reads
+# can help to fully assemble longer insertions, under certain circumstances this feature can add a very large runtime
+# burden. For instance, given the very high chimeric pair rates found in degraded FFPE samples, the runtime of the read
+# retrieval process can be unpredicable. For this reason the feature is disabled by default for somatic variant calling.
+# This feature can be enabled/disabled separately for germline and cancer calling below.
+#
+# Here "CancerCallingModes" includes tumor-normal subtraction and tumor-only calling. "GermlineCallingModes" includes
+# all other calling modes.
+enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1
+enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0
+
+# Set if an overlapping read pair will be considered as evidence
+# Set to 0 to skip overlapping read pairs
+useOverlapPairEvidence = 0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/conf_file_4.ini	Tue Feb 20 08:31:27 2024 +0000
@@ -0,0 +1,58 @@
+
+#
+# This section contains all configuration settings for the top-level manta workflow,
+#
+[manta]
+
+referenceFasta = /illumina/development/Isis/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa
+
+# Run discovery and candidate reporting for all SVs/indels at or above this size
+# Separate option (to provide different default) used for runs in RNA-mode
+minCandidateVariantSize = 8
+rnaMinCandidateVariantSize = 1000
+
+# Remove all edges from the graph unless they're supported by this many 'observations'.
+# Note that one supporting read pair or split read usually equals one observation, but evidence is sometimes downweighted.
+minEdgeObservations = 3
+
+# If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge.
+# Set to 0 to turn this filtration off
+graphNodeMaxEdgeCount = 10
+
+# Run discovery and candidate reporting for all SVs/indels with at least this
+# many spanning support observations
+minCandidateSpanningCount = 3
+
+# After candidate identification, only score and report SVs/indels at or above this size:
+minScoredVariantSize = 50
+
+# minimum VCF "QUAL" score for a variant to be included in the diploid vcf:
+minDiploidVariantScore = 10
+
+# VCF "QUAL" score below which a variant is marked as filtered in the diploid vcf:
+minPassDiploidVariantScore = 20
+
+# minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf:
+minPassDiploidGTScore = 15
+
+# somatic quality scores below this level are not included in the somatic vcf:
+minSomaticScore = 10
+
+# somatic quality scores below this level are filtered in the somatic vcf:
+minPassSomaticScore = 30
+
+# Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote
+# locations with poor mapping quality, which pair to confidently mapping reads near the insertion locus. These reads
+# can help to fully assemble longer insertions, under certain circumstances this feature can add a very large runtime
+# burden. For instance, given the very high chimeric pair rates found in degraded FFPE samples, the runtime of the read
+# retrieval process can be unpredicable. For this reason the feature is disabled by default for somatic variant calling.
+# This feature can be enabled/disabled separately for germline and cancer calling below.
+#
+# Here "CancerCallingModes" includes tumor-normal subtraction and tumor-only calling. "GermlineCallingModes" includes
+# all other calling modes.
+enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1
+enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0
+
+# Set if an overlapping read pair will be considered as evidence
+# Set to 0 to skip overlapping read pairs
+useOverlapPairEvidence = 0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/conf_file_5.ini	Tue Feb 20 08:31:27 2024 +0000
@@ -0,0 +1,58 @@
+
+#
+# This section contains all configuration settings for the top-level manta workflow,
+#
+[manta]
+
+referenceFasta = /illumina/development/Isis/Genomes/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa
+
+# Run discovery and candidate reporting for all SVs/indels at or above this size
+# Separate option (to provide different default) used for runs in RNA-mode
+minCandidateVariantSize = 8
+rnaMinCandidateVariantSize = 1000
+
+# Remove all edges from the graph unless they're supported by this many 'observations'.
+# Note that one supporting read pair or split read usually equals one observation, but evidence is sometimes downweighted.
+minEdgeObservations = 3
+
+# If both nodes of an edge have an edge count higher than this, then skip evaluation of the edge.
+# Set to 0 to turn this filtration off
+graphNodeMaxEdgeCount = 10
+
+# Run discovery and candidate reporting for all SVs/indels with at least this
+# many spanning support observations
+minCandidateSpanningCount = 3
+
+# After candidate identification, only score and report SVs/indels at or above this size:
+minScoredVariantSize = 50
+
+# minimum VCF "QUAL" score for a variant to be included in the diploid vcf:
+minDiploidVariantScore = 10
+
+# VCF "QUAL" score below which a variant is marked as filtered in the diploid vcf:
+minPassDiploidVariantScore = 20
+
+# minimum genotype quality score below which single samples are filtered for a variant in the diploid vcf:
+minPassDiploidGTScore = 15
+
+# somatic quality scores below this level are not included in the somatic vcf:
+minSomaticScore = 10
+
+# somatic quality scores below this level are filtered in the somatic vcf:
+minPassSomaticScore = 30
+
+# Remote read retrieval is used ot improve the assembly of putative insertions by retrieving any mate reads in remote
+# locations with poor mapping quality, which pair to confidently mapping reads near the insertion locus. These reads
+# can help to fully assemble longer insertions, under certain circumstances this feature can add a very large runtime
+# burden. For instance, given the very high chimeric pair rates found in degraded FFPE samples, the runtime of the read
+# retrieval process can be unpredicable. For this reason the feature is disabled by default for somatic variant calling.
+# This feature can be enabled/disabled separately for germline and cancer calling below.
+#
+# Here "CancerCallingModes" includes tumor-normal subtraction and tumor-only calling. "GermlineCallingModes" includes
+# all other calling modes.
+enableRemoteReadRetrievalForInsertionsInGermlineCallingModes = 1
+enableRemoteReadRetrievalForInsertionsInCancerCallingModes = 0
+
+# Set if an overlapping read pair will be considered as evidence
+# Set to 0 to skip overlapping read pairs
+useOverlapPairEvidence = 0
Binary file test-data/somaticSV.vcf.gz has changed