changeset 0:e5150e64206a draft

planemo upload for repository https://github.com/hepcat72/robs_galaxy_tools/tree/master/tools/lumpyexpress commit 9c7264014db750b32a8fa78f511c7efbd12529d9-dirty
author hepcat72
date Tue, 16 Oct 2018 15:55:49 -0400
parents
children 107fa8e0b744
files lumpy_wrapper.tcsh lumpyexpress.xml test-data/lumpy-test1.vcf test-data/test1-1.bam test-data/test1-2.bam test-data/test1-3.bam
diffstat 6 files changed, 257 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lumpy_wrapper.tcsh	Tue Oct 16 15:55:49 2018 -0400
@@ -0,0 +1,147 @@
+#!/bin/tcsh
+
+#USAGE:   lumpy_pipeline.tcsh PAIREDENDRUN OUTVCF    BAMS
+#EXAMPLE: lumpy_pipeline.tcsh 1            lumpy.vcf *.bam
+
+#PAIREDENDRUN - 1=paired end 0=single end
+#BAMS - Any bam files (will be sorted if not sorted & indexed if not indexed)
+
+#REQUIRED DEPENDENCIES:
+#lumpy
+#samtools
+#samblaster
+
+setenv PAIREDENDRUN `echo $argv | cut -f 1 -d " "`
+setenv OUTVCF       `echo $argv | cut -f 2 -d " "`
+setenv MYARGV       `echo $argv | cut -f 3-999 -d " "`
+
+setenv BAMS ''
+setenv SPLTS ''
+setenv DSCDS ''
+setenv SPLTOPTS ''
+
+foreach b ( $MYARGV )
+
+  echo
+  echo Preparing $b
+
+  echo
+  echo Parsing sample name
+  set SAMPLE=`echo $b | perl -e 'while(<>){s/\.bam//;print}'`
+
+  if ( $status ) then
+    echo "Unable to parse sample name in $b"
+    exit 1
+  endif
+
+  echo
+  echo "Checking BAM $b"
+  samtools view -H $b | perl -e '$y=0;while(<>){if(/SO:coordinate/){$y=1;}}if($y == 0){exit(2)}'
+  
+  if ( $status ) then
+    echo
+    echo "Sorting BAM $b"
+    samtools sort -o $b.sort -O BAM $b
+    if ( $status ) then
+      echo "Error sorting BAM $b"
+      exit 2
+    endif
+    mv -f $b.sort $b
+    if ( $status ) then
+      echo "Error renaming BAM $b.sort $b"
+      exit 3
+    endif
+  endif
+
+  perl -e 'unless(-e "$ARGV[0].bai"){exit(3)}' $b
+  if ( $status ) then
+    echo
+    echo "Indexing BAM $b"
+    samtools index -b $b
+    if ( $status ) then
+      echo "Error indexing BAM"
+      exit 4
+    endif
+  endif
+
+  echo
+  echo Getting splitters
+  samtools sort -n -O sam $b | samblaster -q -s /dev/stdout -o /dev/null | samtools view -Sb - | samtools sort - -o ${SAMPLE}.splitters.bam
+
+  if ( $status ) then
+    echo "Getting splitters from $b failed"
+    exit 5
+  endif
+
+  echo
+  echo Indexing splitters
+  samtools index -b ${SAMPLE}.splitters.bam
+
+  if ( $status ) then
+    echo "Indexing discordants in $b failed"
+    exit 6
+  endif
+
+  if ( $PAIREDENDRUN ) then
+    echo
+    echo Getting discordants
+    samtools view -b -F 1294 $b | samtools sort - -o ${SAMPLE}.discordants.bam
+
+    if ( $status ) then
+      echo "Getting discordants from $b failed"
+      exit 7
+    endif
+
+    echo
+    echo Indexing discordants
+    samtools index -b ${SAMPLE}.discordants.bam
+
+    if ( $status ) then
+      echo "Indexing discordants in $b failed"
+      exit 8
+    endif
+
+    if ( ${?DSCDS} > 0 && ${%DSCDS} > 0 )   setenv DSCDS "$DSCDS,${SAMPLE}.discordants.bam"
+    if ( ${?DSCDS} == 0 || ${%DSCDS} == 0 ) setenv DSCDS ${SAMPLE}.discordants.bam
+
+    if ( ${?SPLTS} > 0 && ${%SPLTS} > 0 )   setenv SPLTS "$SPLTS,${SAMPLE}.splitters.bam"
+    if ( ${?SPLTS} == 0 || ${%SPLTS} == 0 ) setenv SPLTS ${SAMPLE}.splitters.bam
+
+  else
+
+    if ( ${?SPLTOPTS} > 0 && ${%SPLTOPTS} > 0 )   setenv SPLTOPTS "$SPLTOPTS -sr id:${SAMPLE},bam_file:${SAMPLE}.splitters.bam,back_distance:10,weight:1,min_mapping_threshold:20"
+    if ( ${?SPLTOPTS} == 0 || ${%SPLTOPTS} == 0 ) setenv SPLTOPTS "-sr id:${SAMPLE},bam_file:${SAMPLE}.splitters.bam,back_distance:10,weight:1,min_mapping_threshold:20"
+
+  endif
+
+  if ( ${?BAMS} > 0 && ${%BAMS} > 0 )     setenv BAMS  "$BAMS,$b"
+  if ( ${?BAMS} == 0 || ${%BAMS} == 0 )   setenv BAMS  $b
+
+end
+
+if ( $PAIREDENDRUN ) then
+
+  echo
+  echo "Running: lumpyexpress -B $BAMS -S $SPLTS -D $DSCDS -o $OUTVCF"
+  lumpyexpress -B $BAMS -S $SPLTS -D $DSCDS -o $OUTVCF
+
+  if ( $status ) then
+    echo "lumpyexpress failed"
+    exit 9
+  endif
+
+else
+
+  echo
+  echo "Running lumpy: lumpy -mw 4 -tt 0 $SPLTOPTS > $OUTVCF"
+  lumpy -mw 4 -tt 0 $SPLTOPTS > $OUTVCF
+
+  if ( $status ) then
+    echo "lumpy failed"
+    exit 10
+  endif
+
+endif
+
+echo
+echo DONE
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lumpyexpress.xml	Tue Oct 16 15:55:49 2018 -0400
@@ -0,0 +1,74 @@
+<tool id="lumpyexpress_0_1" name="lumpyexpress" version="0.1.0">
+    <description>Single or paired end SV detection</description>
+
+    <requirements>
+        <requirement type="package" version="2.7">python</requirement>
+        <requirement type="package" version="0.8.3">pysam</requirement>
+        <requirement type="package" version="1.13.1">numpy</requirement>
+        <requirement type="package" version="0.2.14a">lumpy-sv</requirement>
+        <requirement type="package" version="1.9">samtools</requirement>
+        <requirement type="package" version="0.1.24">samblaster</requirement>
+        <requirement type="package" version="0.6.5">sambamba</requirement>
+        <requirement type="package" version="4.2.1">gawk</requirement>
+    </requirements>
+
+    <version_command>___COMMAND___ ___VERSION_FLAG___</version_command>
+
+    <command detect_errors="aggressive">
+        <![CDATA[
+            $__tool_directory__/lumpy_wrapper.tcsh
+            '$paired'
+            '$outfile'
+            #for $bam in $bams
+                '$bam'
+            #end for
+        ]]>
+    </command>
+
+    <inputs>
+        <param format="bam"
+               name="bams"
+               label="Bam file(s)"
+               argument="argument positions 3-N"
+
+               type="data"
+               multiple="true"
+
+               help="Optionally sorted/indexed whole bam files.  Splitters and discordants will be extracted from this file."/>
+
+        <param name="paired"
+               label="Bam files contain paired end data"
+               argument="argument position 1"
+
+               type="boolean"
+               truevalue="1"
+               falsevalue="0"
+               checked="no"
+               value="false"/>
+    </inputs>
+
+    <outputs>
+        <data format="vcf" name="outfile" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="bams" value="test1-1.bam,test1-2.bam,test1-3.bam"/>
+            <param name="paired" value="1"/>
+            <output name="outfile" file="lumpy-test1.vcf"/>
+        </test>
+    </tests>
+
+    <help>
+        <![CDATA[
+            Paired-end data is processed with lumpyexpress using only the required arguments.  Single-end data is processed with lumpy using `-mw 4 -tt 0 -sr id:<SAMPLE>,bam_file:<SAMPLE>.splitters.bam,back_distance:10,weight:1,min_mapping_threshold:20`.  Submit an issue to the tool wrapper repo if you would like more options to be available: https://github.com/hepcat72/robs_galaxy_tools
+
+            Split reads and discordant reads are extracted from the supplied bam files.  Each bam file should represent a different sample.
+        ]]>
+    </help>
+
+    <citations>
+        <citation type="doi">10.1186/gb-2014-15-6-r84</citation>
+    </citations>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/lumpy-test1.vcf	Tue Oct 16 15:55:49 2018 -0400
@@ -0,0 +1,36 @@
+##fileformat=VCFv4.2
+##source=LUMPY
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
+##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants">
+##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants">
+##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
+##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
+##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants">
+##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples">
+##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples">
+##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples">
+##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples">
+##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call">
+##INFO=<ID=PRPOS,Number=.,Type=String,Description="LUMPY probability curve of the POS breakend">
+##INFO=<ID=PREND,Number=.,Type=String,Description="LUMPY probability curve of the END breakend">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=DUP:TANDEM,Description="Tandem duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">
+##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant">
+##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant">
+##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	JSS-Vmut-01__Read	JSS-Vmut-12__Read	JSS-Vmut-14__Read
+NC_009016.1	1417	1_1	N	[NC_009016.1:19752[N	.	.	SVTYPE=BND;STRANDS=--:5;EVENT=1;MATEID=1_2;CIPOS=-128,9;CIEND=-291,9;CIPOS95=-92,2;CIEND95=-121,2;IMPRECISE;SU=5;PE=5;SR=0	GT:SU:PE:SR	./.:1:1:0	./.:4:4:0	./.:0:0:0
+NC_009016.1	19752	1_2	N	[NC_009016.1:1417[N	.	.	SVTYPE=BND;STRANDS=--:5;SECONDARY;EVENT=1;MATEID=1_1;CIPOS=-291,9;CIEND=-128,9;CIPOS95=-121,2;CIEND95=-92,2;IMPRECISE;SU=5;PE=5;SR=0	GT:SU:PE:SR	./.:1:1:0	./.:4:4:0	./.:0:0:0
+NC_009016.1	10	2	N	<DUP>	.	.	SVTYPE=DUP;STRANDS=-+:350;SVLEN=38187;END=38197;CIPOS=0,0;CIEND=0,0;CIPOS95=0,0;CIEND95=0,0;SU=350;PE=249;SR=101	GT:SU:PE:SR	./.:84:46:38	./.:162:123:39	./.:104:80:24
Binary file test-data/test1-1.bam has changed
Binary file test-data/test1-2.bam has changed
Binary file test-data/test1-3.bam has changed