Mercurial > repos > artbio > lumpy_smoove

--- a/lumpy_smoove.xml	Thu Aug 27 17:14:25 2020 -0400
+++ b/lumpy_smoove.xml	Fri Sep 25 10:43:20 2020 +0000
@@ -1,4 +1,4 @@
-<tool id="lumpy_smoove" name="lumpy_smoove" version="0.6.0">
+<tool id="lumpy_smoove" name="lumpy_smoove" version="0.2.5+galaxy5">
     <description>find structural variants using the smoove workflow</description>
     <macros>
         <import>macro_lumpy_smoove.xml</import>
@@ -17,9 +17,15 @@
         ln -f -s $set_plan.normal_bam.metadata.bam_index normal.bam.bai &&
         ln -s $set_plan.tumor_bam tumor.bam &&
         ln -f -s $set_plan.tumor_bam.metadata.bam_index tumor.bam.bai &&
-    #else
+    #elif $set_plan.plan_choice=='single':
         ln -s $set_plan.single_bam single.bam &&
         ln -f -s $set_plan.single_bam.metadata.bam_index single.bam.bai &&
+    #else:
+        #for $sample in $set_plan.cohort:
+            ln -s $sample ${sample.element_identifier}.bam &&
+            ln -f -s $sample.metadata.bam_index ${sample.element_identifier}.bam.bai &&
+        #end for
+    ls -la &&
     #end if

     smoove call --name output
@@ -43,6 +49,7 @@
             <param name="plan_choice" type="select" label="Analyse a single Bam or a pair of Bam (eg normal/tumor)" display="radio">
                 <option value="pair" selected="true">A pair of Bam files</option>
                 <option value="single">A single Bam</option>
+                <option value="cohort">a small cohort of Bam files (less than ~40)</option>
             </param>
             <when value="pair">
                 <param format="bam" name="normal_bam" type="data" label="BAM alignment from the normal sample"/>
@@ -51,6 +58,9 @@
             <when value="single">
                 <param format="bam" name="single_bam" type="data" label="BAM alignment from a single sample"/>
             </when>
+            <when value="cohort">
+                <param name="cohort" type="data_collection" format="bam" label="A collection of bam files" multiple="true"/>
+            </when>
         </conditional>


@@ -77,6 +87,24 @@

     <tests>
         <test>
+
+            <conditional name="set_plan">
+                <param name="plan_choice" value="cohort"/>
+                <param name="cohort">
+                    <collection type="list">
+                        <element name="1" ftype="bam" value="celegans_RG_1.bam"/>
+                        <element name="2" ftype="bam" value="celegans_RG_2.bam"/>
+                    </collection>
+                </param>
+            </conditional>
+            <param name="reference_source_selector" value="history" />
+            <param name="ref_file" value="chrI-ce11.fa"/>
+            <param name="choices" value="yes"/>
+            <param name="bedmask" value="exclude.bed"/>
+            <param name="prpos" value="no"/>
+            <output name="vcf_call" ftype="vcf" file="result-6.vcf" lines_diff="8"/>
+        </test>
+        <test>
             <param name="reference_source_selector" value="history" />
             <param name="ref_file" value="chrI-ce11.fa"/>
             <param name="normal_bam" value="celegans_RG_1.bam"/>
@@ -132,7 +160,8 @@
 There is a blog-post describing smoove in more detail
 here: https://brentp.github.io/post/smoove/

-Currently, this Galaxy tool only wraps smoove for 2 samples (bam normal and tumor inputs),
+Currently, this Galaxy tool only wraps smoove for 1, 2 (bam normal and tumor inputs) or
+a small collection of samples (&lt;40),
 which translates in the command line::

     <![CDATA[smoove call --name my-cohort --exclude $bed --fasta $fasta -p $threads --genotype [--removepr] /path/to/*.bam]]>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/result-6.vcf	Fri Sep 25 10:43:20 2020 +0000
@@ -0,0 +1,59 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##fileDate=20200925
+##reference=reference.fa
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
+##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants">
+##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants">
+##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends">
+##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend">
+##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants">
+##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples">
+##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples">
+##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples">
+##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples">
+##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=DUP:TANDEM,Description="Tandem duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=CNV,Description="Copy number variable region">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant">
+##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant">
+##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant">
+##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality">
+##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample">
+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
+##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally">
+##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations">
+##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations">
+##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally">
+##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)">
+##contig=<ID=chrI,length=15072434>
+##smoove_version=0.2.5
+##smoove_count_stats=celegans-1:2869,2691,194,304
+##smoove_count_stats=celegans-2:2531,2421,134,276
+##source=LUMPY
+##bcftools_annotateVersion=1.10.2+htslib-1.10.2
+##bcftools_annotateCommand=annotate -x INFO/PRPOS,INFO/PREND -Ou; Date=Fri Sep 25 09:37:11 2020
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##bcftools_viewVersion=1.10.2+htslib-1.10.2
+##bcftools_viewCommand=view -c 1 -Oz -c 1 -o output-smoove.genotyped.vcf.gz; Date=Fri Sep 25 09:37:11 2020
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	celegans-1	celegans-2
+chrI	10416569	1	N	<DUP>	170.7	.	SVTYPE=DUP;SVLEN=981;END=10417550;STRANDS=-+:4;IMPRECISE;CIPOS=-769,29;CIEND=-30,636;CIPOS95=-165,8;CIEND95=-9,128;SU=4;PE=4;SR=0;AC=4;AN=4	GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB	1/1:16:130.28:-15,-4,-2:14:4:9:4:9:0:0:0:4:9:0.69	1/1:3:40.43:-4,-1,-1:7:4:3:4:3:0:0:0:4:3:0.43