Mercurial > repos > iuc > lumpy_prep

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lumpy_preprocessing.xml	Thu Nov 12 16:48:34 2020 +0000
@@ -0,0 +1,58 @@
+<tool id="lumpy_prep" name="LUMPY preprocessing" version="@WRAPPER_VERSION@">
+    <description>extracts discordant read pairs and split-read alignments from a BAM dataset</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">lumpy-sv</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+samtools collate -O --output-fmt sam '$bam_in' | samblaster $dup_handling --addMateTags | samtools view -bS - > blasted.bam &&
+
+samtools view -b -F 1294 blasted.bam > discordants.unsorted.bam &&
+samtools view -h blasted.bam | extractSplitReads_BwaMem -i stdin | samtools view -Sb - > splitters.unsorted.bam &&
+
+samtools sort discordants.unsorted.bam > discordants.bam &&
+samtools sort splitters.unsorted.bam > splitters.bam
+    ]]></command>
+    <inputs>
+        <param name="bam_in" type="data" format="bam" label="BAM input dataset" />
+        <param name="dup_handling" type="select"
+        label="Duplicate detection and removal"
+        help="Duplicate reads can be detected and removed with the help of samblaster. If you have already marked duplicates in the input dataset with another tool, you can choose to skip duplicate detection and remove only pre-marked duplicates. Not handling duplicates is only recommended if you have already deduplicated your input with other tools.">
+            <option value="--removeDups">Detect and remove duplicates (samblaster -r)</option>
+            <option value="--acceptDupMarks --removeDups">Remove duplicates marked in input data (samblaster -a -r)</option>
+            <option value="">Don't handle duplicates</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="blasted_bam" format="qname_sorted.bam" from_work_dir="blasted.bam" label="${tool.name}: Collated and groomed on ${on_string}" />
+        <data name="discordants_bam" format="bam" from_work_dir="discordants.bam" label="${tool.name}: Discordant read pairs on ${on_string}" />
+        <data name="splitters_bam" format="bam" from_work_dir="splitters.bam" label="${tool.name}: Split-read alignments on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="bam_in" ftype="bam" value="sample.bam" />
+            <param name="dup_handling" value="" />
+            <output name="blasted_bam" ftype="qname_sorted.bam" file="blasted.bam" />
+            <output name="discordants_bam" ftype="bam" file="discordants.bam" />
+            <output name="splitters_bam" ftype="bam" file="splitters.bam" />
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+This tool implements the mapped reads preprocessing `suggested for LUMPY <https://github.com/arq5x/lumpy-sv#pre-processing>`__. It uses samblaster_, samtools and LUMPY's extractSplitReads_BwaMem script to collate and groom an input BAM dataset (preferably produced with bwa-mem) and to extract discordant read pairs and split-read alignments from it. The three output datasets can serve as the three inputs to LUMPY.
+
+.. _samblaster: https://github.com/GregoryFaust/samblaster
+    ]]></help>
+    <expand macro="citations">
+        <citation type="doi">10.1093/bioinformatics/btu314</citation>
+        <citation type="bibtex">
+            @misc{SamTools_github,
+            title={SAMTools GitHub page},
+            url = {https://github.com/samtools/samtools},}
+        </citation>
+    </expand>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Nov 12 16:48:34 2020 +0000
@@ -0,0 +1,10 @@
+<macros>
+    <token name="@WRAPPER_VERSION@">@TOOL_VERSION@+galaxy0</token>
+    <token name="@TOOL_VERSION@">0.3.1</token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1186/gb-2014-15-6-r84</citation>
+            <yield />
+        </citations>
+    </xml>
+</macros>
Binary file test-data/blasted.bam has changed
Binary file test-data/discordants.bam has changed
Binary file test-data/sample.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample.vcf	Thu Nov 12 16:48:34 2020 +0000
@@ -0,0 +1,35 @@
+##fileformat=VCFv4.2
+##source=LUMPY
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
+##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants">
+##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants">
+##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
+##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
+##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants">
+##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples">
+##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples">
+##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples">
+##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples">
+##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call">
+##INFO=<ID=PRPOS,Number=.,Type=String,Description="LUMPY probability curve of the POS breakend">
+##INFO=<ID=PREND,Number=.,Type=String,Description="LUMPY probability curve of the END breakend">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=DUP:TANDEM,Description="Tandem duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">
+##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant">
+##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant">
+##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	sample1
+chr8	245202	1	N	<DEL>	.	.	SVTYPE=DEL;STRANDS=+-:3;SVLEN=-229;END=245431;CIPOS=-10,64;CIEND=-51,9;CIPOS95=-1,33;CIEND95=-22,3;IMPRECISE;SU=3;PE=3	GT:SU:PE	./.:3:3
+chr8	246846	2	N	<DEL>	.	.	SVTYPE=DEL;STRANDS=+-:2;SVLEN=-176;END=247022;CIPOS=-10,186;CIEND=-159,21;CIPOS95=0,137;CIEND95=-95,13;IMPRECISE;SU=2;PE=2	GT:SU:PE	./.:2:2
Binary file test-data/splitters.bam has changed