Mercurial > repos > hepcat72 > lumpyexpress
changeset 0:e5150e64206a draft
planemo upload for repository https://github.com/hepcat72/robs_galaxy_tools/tree/master/tools/lumpyexpress commit 9c7264014db750b32a8fa78f511c7efbd12529d9-dirty
author | hepcat72 |
---|---|
date | Tue, 16 Oct 2018 15:55:49 -0400 |
parents | |
children | 107fa8e0b744 |
files | lumpy_wrapper.tcsh lumpyexpress.xml test-data/lumpy-test1.vcf test-data/test1-1.bam test-data/test1-2.bam test-data/test1-3.bam |
diffstat | 6 files changed, 257 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lumpy_wrapper.tcsh Tue Oct 16 15:55:49 2018 -0400 @@ -0,0 +1,147 @@ +#!/bin/tcsh + +#USAGE: lumpy_pipeline.tcsh PAIREDENDRUN OUTVCF BAMS +#EXAMPLE: lumpy_pipeline.tcsh 1 lumpy.vcf *.bam + +#PAIREDENDRUN - 1=paired end 0=single end +#BAMS - Any bam files (will be sorted if not sorted & indexed if not indexed) + +#REQUIRED DEPENDENCIES: +#lumpy +#samtools +#samblaster + +setenv PAIREDENDRUN `echo $argv | cut -f 1 -d " "` +setenv OUTVCF `echo $argv | cut -f 2 -d " "` +setenv MYARGV `echo $argv | cut -f 3-999 -d " "` + +setenv BAMS '' +setenv SPLTS '' +setenv DSCDS '' +setenv SPLTOPTS '' + +foreach b ( $MYARGV ) + + echo + echo Preparing $b + + echo + echo Parsing sample name + set SAMPLE=`echo $b | perl -e 'while(<>){s/\.bam//;print}'` + + if ( $status ) then + echo "Unable to parse sample name in $b" + exit 1 + endif + + echo + echo "Checking BAM $b" + samtools view -H $b | perl -e '$y=0;while(<>){if(/SO:coordinate/){$y=1;}}if($y == 0){exit(2)}' + + if ( $status ) then + echo + echo "Sorting BAM $b" + samtools sort -o $b.sort -O BAM $b + if ( $status ) then + echo "Error sorting BAM $b" + exit 2 + endif + mv -f $b.sort $b + if ( $status ) then + echo "Error renaming BAM $b.sort $b" + exit 3 + endif + endif + + perl -e 'unless(-e "$ARGV[0].bai"){exit(3)}' $b + if ( $status ) then + echo + echo "Indexing BAM $b" + samtools index -b $b + if ( $status ) then + echo "Error indexing BAM" + exit 4 + endif + endif + + echo + echo Getting splitters + samtools sort -n -O sam $b | samblaster -q -s /dev/stdout -o /dev/null | samtools view -Sb - | samtools sort - -o ${SAMPLE}.splitters.bam + + if ( $status ) then + echo "Getting splitters from $b failed" + exit 5 + endif + + echo + echo Indexing splitters + samtools index -b ${SAMPLE}.splitters.bam + + if ( $status ) then + echo "Indexing discordants in $b failed" + exit 6 + endif + + if ( $PAIREDENDRUN ) then + echo + echo Getting discordants + samtools view -b -F 1294 $b | samtools sort - -o ${SAMPLE}.discordants.bam + + if ( $status ) then + echo "Getting discordants from $b failed" + exit 7 + endif + + echo + echo Indexing discordants + samtools index -b ${SAMPLE}.discordants.bam + + if ( $status ) then + echo "Indexing discordants in $b failed" + exit 8 + endif + + if ( ${?DSCDS} > 0 && ${%DSCDS} > 0 ) setenv DSCDS "$DSCDS,${SAMPLE}.discordants.bam" + if ( ${?DSCDS} == 0 || ${%DSCDS} == 0 ) setenv DSCDS ${SAMPLE}.discordants.bam + + if ( ${?SPLTS} > 0 && ${%SPLTS} > 0 ) setenv SPLTS "$SPLTS,${SAMPLE}.splitters.bam" + if ( ${?SPLTS} == 0 || ${%SPLTS} == 0 ) setenv SPLTS ${SAMPLE}.splitters.bam + + else + + if ( ${?SPLTOPTS} > 0 && ${%SPLTOPTS} > 0 ) setenv SPLTOPTS "$SPLTOPTS -sr id:${SAMPLE},bam_file:${SAMPLE}.splitters.bam,back_distance:10,weight:1,min_mapping_threshold:20" + if ( ${?SPLTOPTS} == 0 || ${%SPLTOPTS} == 0 ) setenv SPLTOPTS "-sr id:${SAMPLE},bam_file:${SAMPLE}.splitters.bam,back_distance:10,weight:1,min_mapping_threshold:20" + + endif + + if ( ${?BAMS} > 0 && ${%BAMS} > 0 ) setenv BAMS "$BAMS,$b" + if ( ${?BAMS} == 0 || ${%BAMS} == 0 ) setenv BAMS $b + +end + +if ( $PAIREDENDRUN ) then + + echo + echo "Running: lumpyexpress -B $BAMS -S $SPLTS -D $DSCDS -o $OUTVCF" + lumpyexpress -B $BAMS -S $SPLTS -D $DSCDS -o $OUTVCF + + if ( $status ) then + echo "lumpyexpress failed" + exit 9 + endif + +else + + echo + echo "Running lumpy: lumpy -mw 4 -tt 0 $SPLTOPTS > $OUTVCF" + lumpy -mw 4 -tt 0 $SPLTOPTS > $OUTVCF + + if ( $status ) then + echo "lumpy failed" + exit 10 + endif + +endif + +echo +echo DONE
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lumpyexpress.xml Tue Oct 16 15:55:49 2018 -0400 @@ -0,0 +1,74 @@ +<tool id="lumpyexpress_0_1" name="lumpyexpress" version="0.1.0"> + <description>Single or paired end SV detection</description> + + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="0.8.3">pysam</requirement> + <requirement type="package" version="1.13.1">numpy</requirement> + <requirement type="package" version="0.2.14a">lumpy-sv</requirement> + <requirement type="package" version="1.9">samtools</requirement> + <requirement type="package" version="0.1.24">samblaster</requirement> + <requirement type="package" version="0.6.5">sambamba</requirement> + <requirement type="package" version="4.2.1">gawk</requirement> + </requirements> + + <version_command>___COMMAND___ ___VERSION_FLAG___</version_command> + + <command detect_errors="aggressive"> + <![CDATA[ + $__tool_directory__/lumpy_wrapper.tcsh + '$paired' + '$outfile' + #for $bam in $bams + '$bam' + #end for + ]]> + </command> + + <inputs> + <param format="bam" + name="bams" + label="Bam file(s)" + argument="argument positions 3-N" + + type="data" + multiple="true" + + help="Optionally sorted/indexed whole bam files. Splitters and discordants will be extracted from this file."/> + + <param name="paired" + label="Bam files contain paired end data" + argument="argument position 1" + + type="boolean" + truevalue="1" + falsevalue="0" + checked="no" + value="false"/> + </inputs> + + <outputs> + <data format="vcf" name="outfile" /> + </outputs> + + <tests> + <test> + <param name="bams" value="test1-1.bam,test1-2.bam,test1-3.bam"/> + <param name="paired" value="1"/> + <output name="outfile" file="lumpy-test1.vcf"/> + </test> + </tests> + + <help> + <![CDATA[ + Paired-end data is processed with lumpyexpress using only the required arguments. Single-end data is processed with lumpy using `-mw 4 -tt 0 -sr id:<SAMPLE>,bam_file:<SAMPLE>.splitters.bam,back_distance:10,weight:1,min_mapping_threshold:20`. Submit an issue to the tool wrapper repo if you would like more options to be available: https://github.com/hepcat72/robs_galaxy_tools + + Split reads and discordant reads are extracted from the supplied bam files. Each bam file should represent a different sample. + ]]> + </help> + + <citations> + <citation type="doi">10.1186/gb-2014-15-6-r84</citation> + </citations> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lumpy-test1.vcf Tue Oct 16 15:55:49 2018 -0400 @@ -0,0 +1,36 @@ +##fileformat=VCFv4.2 +##source=LUMPY +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"> +##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> +##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants"> +##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants"> +##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants"> +##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants"> +##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends"> +##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend"> +##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants"> +##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples"> +##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples"> +##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples"> +##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples"> +##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call"> +##INFO=<ID=PRPOS,Number=.,Type=String,Description="LUMPY probability curve of the POS breakend"> +##INFO=<ID=PREND,Number=.,Type=String,Description="LUMPY probability curve of the END breakend"> +##ALT=<ID=DEL,Description="Deletion"> +##ALT=<ID=DUP,Description="Duplication"> +##ALT=<ID=INV,Description="Inversion"> +##ALT=<ID=DUP:TANDEM,Description="Tandem duplication"> +##ALT=<ID=INS,Description="Insertion of novel sequence"> +##ALT=<ID=CNV,Description="Copy number variable region"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant"> +##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant"> +##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant"> +##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT JSS-Vmut-01__Read JSS-Vmut-12__Read JSS-Vmut-14__Read +NC_009016.1 1417 1_1 N [NC_009016.1:19752[N . . SVTYPE=BND;STRANDS=--:5;EVENT=1;MATEID=1_2;CIPOS=-128,9;CIEND=-291,9;CIPOS95=-92,2;CIEND95=-121,2;IMPRECISE;SU=5;PE=5;SR=0 GT:SU:PE:SR ./.:1:1:0 ./.:4:4:0 ./.:0:0:0 +NC_009016.1 19752 1_2 N [NC_009016.1:1417[N . . SVTYPE=BND;STRANDS=--:5;SECONDARY;EVENT=1;MATEID=1_1;CIPOS=-291,9;CIEND=-128,9;CIPOS95=-121,2;CIEND95=-92,2;IMPRECISE;SU=5;PE=5;SR=0 GT:SU:PE:SR ./.:1:1:0 ./.:4:4:0 ./.:0:0:0 +NC_009016.1 10 2 N <DUP> . . SVTYPE=DUP;STRANDS=-+:350;SVLEN=38187;END=38197;CIPOS=0,0;CIEND=0,0;CIPOS95=0,0;CIEND95=0,0;SU=350;PE=249;SR=101 GT:SU:PE:SR ./.:84:46:38 ./.:162:123:39 ./.:104:80:24