Galaxy |

Changeset 0:477a07f387e0 (2020-11-12)

Next changeset 1:88b35d91a662 (2021-07-10)

Commit message:
"planemo upload for repository https://github.com/arq5x/lumpy-sv commit cce17262b21b0964c31eb983bac5e89ae92b8ee9"

added:
lumpy_sv.xml
macros.xml
test-data/blasted.bam
test-data/discordants.bam
test-data/sample.bam
test-data/sample.vcf
test-data/splitters.bam

diff -r 000000000000 -r 477a07f387e0 lumpy_sv.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lumpy_sv.xml Thu Nov 12 16:48:15 2020 +0000

[

b'@@ -0,0 +1,265 @@\n+<tool id="lumpy_sv" name="LUMPY" version="@WRAPPER_VERSION@">\n+ <description>is a probabilistic framework for structural variant discovery</description>\n+ <macros>\n+ <import>macros.xml</import>\n+ <xml name="pe_options">\n+ <param name="read_length" type="integer" value="101" label="Length of sequenced reads" help="" />\n+ <param name="min_non_overlap" type="integer" value="101" label="Number of base pair positions that must be unique to each end of a read pair" help="Some library preps are created with large reads and small library sizes such that read overlap, in all over cases overlapping reads tends to be a sign of an error. We typically set this to read length (pairs cannot overlap)." />\n+ <param name="discordant_z" type="integer" value="5" label="Number of standard deviations away from the mean to be considered as a normal library size" help="" />\n+ <param name="back_distance" type="integer" value="10" label="Distance into the read to add to the breakpoint interval" help="" />\n+ <param name="min_mapping_threshold" type="integer" value="20" label="Minimum mapping quality (reported from the aligner) that a read must have to be considered" help="A quality of 1 will filter all reads with two or more equally good mappings." />\n+ <param name="weight" type="integer" value="1" label="Weight of each piece of evidence from this sample" help="" />\n+ </xml>\n+ <xml name="sr_options">\n+ <param name="sr_back_distance" type="integer" value="10" label="Distance around the +/- of the split to include in the breakpoint interval" help="A distance of 20 will created a breakpoint interval of size 40 centered at the split." />\n+ <param name="sr_min_mapping_threshold" type="integer" value="20" label="Minimum mapping quality (reported from the aligner) that a read must have to be considered" help="A quality of 1 will filter all reads with two or more equally good mappings." />\n+ <param name="sr_weight" type="integer" value="1" label="Weight of each piece of evidence from this sample" help="" />\n+ </xml>\n+ </macros>\n+ <requirements>\n+ <requirement type="package" version="@TOOL_VERSION@">lumpy-sv</requirement>\n+ </requirements>\n+ <command detect_errors="exit_code"><![CDATA[\n+python \'$configure_job\' > lumpy_job.sh &&\n+chmod u+x lumpy_job.sh &&\n+cat lumpy_job.sh &&\n+./lumpy_job.sh > \'$result\'\n+ ]]></command>\n+ <configfiles>\n+ <configfile name="configure_job"><![CDATA[\n+## The Python script that gets put together here, will emit a shell script\n+## with the necessary commands for a traditional (non-express) LUMPY workflow.\n+## After running the python code, the resulting shell script is all that\'s\n+## needed to run LUMPY with all user-specified settings.\n+import os\n+import pysam\n+\n+preproc_cmds = []\n+lumpy_cmd_parts = [\'lumpy -mw ${general.mw} -tt ${general.tt} ${general.e}\']\n+\n+## symlink in all input bams, and there index files if available\n+full_bams = []\n+disc_bams = []\n+split_bams = []\n+#for $n,$bam_file in enumerate($input_bams):\n+## main input files are collated BAMs without index\n+os.symlink(\'$bam_file\', \'f${n}.bam\')\n+full_bams.append(\'f${n}.bam\')\n+#end for\n+#if $discordant_alns:\n+ #for $n, $disc_file in enumerate($discordant_alns):\n+os.symlink(\'$disc_file\', \'d${n}.bam\')\n+os.symlink(\'$disc_file.metadata.bam_index\', \'d${n}.bam.bai\')\n+disc_bams.append(\'d${n}.bam\')\n+ #end for\n+#end if\n+#if $split_alns:\n+ #for $n, $split_file in enumerate($split_alns):\n+os.symlink(\'$split_file\', \'s${n}.bam\')\n+os.symlink(\'$split_file.metadata.bam_index\', \'s${n}.bam.bai\')\n+split_bams.append(\'s${n}.bam\')\n+ #end for\n+#end if\n+if not disc_bams and not split_bams:\n+ raise Exception(\'Either discordant or split alignments are required as input\')\n+\n+## make pe and sr params available in the Python code\n+#set $pe_rg_specs = {}\n+#for $per_rg_pe in $pe.rg_specific:\n+ #sile'..b' <param name="tt" type="integer" value="0" label="Trim threshold (-tt)" help="" />\n+ \n+ \n+ \n+ \n+ <param name="stat_tail_n" type="integer" min="1" value="100000" label="stat_tail_n" help="" />\n+ </section>\n+ \n+ <section name="pe" title="Paired-end options (-pe)" expanded="false">\n+ <repeat name="rg_specific" title="Read group-specific settings" default="0" min="0"\n+ help="Define paired-end options to be applied to one specific read group.">\n+ <param name="rg_id" type="text"\n+ label="Read group to apply settings to"\n+ help="All settings below will only be applied to reads belonging to the specified read group. The value provided here must correspond to one of the read group IDs defined in the main input and the discordant pairs datasets." />\n+ <expand macro="pe_options" />\n+ </repeat>\n+ <section name="default" title="Default settings for unconfigured read groups" expanded="true">\n+ <expand macro="pe_options" />\n+ </section>\n+ </section>\n+ \n+ <section name="sr" title="Split-read options (-sr)" expanded="false">\n+ <repeat name="sm_specific" title="Sample-specific settings" default="0" min="0"\n+ help="Define split-reads options to be applied to one specific sample.">\n+ <param name="rg_sm" type="text"\n+ label="Sample to apply settings to"\n+ help="All settings below will only be applied to reads of the specified sample. The value provided here must correspond to one of the read group SM values defined in the main input and the split-reads datasets." />\n+ <expand macro="sr_options" />\n+ </repeat>\n+ <section name="default" title="Default settings for unconfigured samples" expanded="true">\n+ <expand macro="sr_options" />\n+ </section>\n+ </section>\n+ \n+ \n+ \n+ \n+ \n+ </inputs>\n+ <outputs>\n+ <data name="result" format="vcf" label="${tool.name} on ${on_string}" />\n+ </outputs>\n+ <tests>\n+ <test>\n+ <param name="input_bams" ftype="qname_sorted.bam" value="blasted.bam" />\n+ <param name="discordant_alns" ftype="bam" value="discordants.bam" />\n+ <section name="general">\n+ <param name="e" value="false" />\n+ <param name="stat_tail_n" value="1" />\n+ </section>\n+ <section name="pe">\n+ <section name="default">\n+ <param name="weight" value="2" />\n+ </section>\n+ </section>\n+ <output name="result" ftype="vcf" file="sample.vcf" />\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n+LUMPY\n+=============\n+\n+A probabilistic framework for structural variant discovery.\n+\n+For more information see the LUMPY documentation_.\n+\n+.. _documentation: https://github.com/arq5x/lumpy-sv\n+\n+ ]]></help>\n+ <expand macro="citations" />\n+</tool>\n'

diff -r 000000000000 -r 477a07f387e0 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Nov 12 16:48:15 2020 +0000

@@ -0,0 +1,10 @@
+<macros>
+    <token name="@WRAPPER_VERSION@">@TOOL_VERSION@+galaxy0</token>
+    <token name="@TOOL_VERSION@">0.3.1</token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1186/gb-2014-15-6-r84</citation>
+            <yield />
+        </citations>
+    </xml>
+</macros>

diff -r 000000000000 -r 477a07f387e0 test-data/blasted.bam

Binary file test-data/blasted.bam has changed

diff -r 000000000000 -r 477a07f387e0 test-data/discordants.bam

Binary file test-data/discordants.bam has changed

diff -r 000000000000 -r 477a07f387e0 test-data/sample.bam

Binary file test-data/sample.bam has changed

diff -r 000000000000 -r 477a07f387e0 test-data/sample.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample.vcf Thu Nov 12 16:48:15 2020 +0000

@@ -0,0 +1,35 @@
+##fileformat=VCFv4.2
+##source=LUMPY
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
+##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants">
+##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants">
+##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
+##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
+##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants">
+##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples">
+##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples">
+##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples">
+##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples">
+##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call">
+##INFO=<ID=PRPOS,Number=.,Type=String,Description="LUMPY probability curve of the POS breakend">
+##INFO=<ID=PREND,Number=.,Type=String,Description="LUMPY probability curve of the END breakend">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=DUP:TANDEM,Description="Tandem duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">
+##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant">
+##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant">
+##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1
+chr8 245202 1 N <DEL> . . SVTYPE=DEL;STRANDS=+-:3;SVLEN=-229;END=245431;CIPOS=-10,64;CIEND=-51,9;CIPOS95=-1,33;CIEND95=-22,3;IMPRECISE;SU=3;PE=3 GT:SU:PE ./.:3:3
+chr8 246846 2 N <DEL> . . SVTYPE=DEL;STRANDS=+-:2;SVLEN=-176;END=247022;CIPOS=-10,186;CIEND=-159,21;CIPOS95=0,137;CIEND95=-95,13;IMPRECISE;SU=2;PE=2 GT:SU:PE ./.:2:2

diff -r 000000000000 -r 477a07f387e0 test-data/splitters.bam

Binary file test-data/splitters.bam has changed