Mercurial > repos > artbio > lumpy_smoove
changeset 5:bd4135caa3fa draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 627bb5344b7d86c0b7ca7235ea3636269613dc32"
author | artbio |
---|---|
date | Fri, 25 Sep 2020 10:43:20 +0000 |
parents | 49da975ba395 |
children | ad8853ee9909 |
files | lumpy_smoove.xml test-data/result-6.vcf |
diffstat | 2 files changed, 91 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/lumpy_smoove.xml Thu Aug 27 17:14:25 2020 -0400 +++ b/lumpy_smoove.xml Fri Sep 25 10:43:20 2020 +0000 @@ -1,4 +1,4 @@ -<tool id="lumpy_smoove" name="lumpy_smoove" version="0.6.0"> +<tool id="lumpy_smoove" name="lumpy_smoove" version="0.2.5+galaxy5"> <description>find structural variants using the smoove workflow</description> <macros> <import>macro_lumpy_smoove.xml</import> @@ -17,9 +17,15 @@ ln -f -s $set_plan.normal_bam.metadata.bam_index normal.bam.bai && ln -s $set_plan.tumor_bam tumor.bam && ln -f -s $set_plan.tumor_bam.metadata.bam_index tumor.bam.bai && - #else + #elif $set_plan.plan_choice=='single': ln -s $set_plan.single_bam single.bam && ln -f -s $set_plan.single_bam.metadata.bam_index single.bam.bai && + #else: + #for $sample in $set_plan.cohort: + ln -s $sample ${sample.element_identifier}.bam && + ln -f -s $sample.metadata.bam_index ${sample.element_identifier}.bam.bai && + #end for + ls -la && #end if smoove call --name output @@ -43,6 +49,7 @@ <param name="plan_choice" type="select" label="Analyse a single Bam or a pair of Bam (eg normal/tumor)" display="radio"> <option value="pair" selected="true">A pair of Bam files</option> <option value="single">A single Bam</option> + <option value="cohort">a small cohort of Bam files (less than ~40)</option> </param> <when value="pair"> <param format="bam" name="normal_bam" type="data" label="BAM alignment from the normal sample"/> @@ -51,6 +58,9 @@ <when value="single"> <param format="bam" name="single_bam" type="data" label="BAM alignment from a single sample"/> </when> + <when value="cohort"> + <param name="cohort" type="data_collection" format="bam" label="A collection of bam files" multiple="true"/> + </when> </conditional> @@ -77,6 +87,24 @@ <tests> <test> + + <conditional name="set_plan"> + <param name="plan_choice" value="cohort"/> + <param name="cohort"> + <collection type="list"> + <element name="1" ftype="bam" value="celegans_RG_1.bam"/> + <element name="2" ftype="bam" value="celegans_RG_2.bam"/> + </collection> + </param> + </conditional> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" value="chrI-ce11.fa"/> + <param name="choices" value="yes"/> + <param name="bedmask" value="exclude.bed"/> + <param name="prpos" value="no"/> + <output name="vcf_call" ftype="vcf" file="result-6.vcf" lines_diff="8"/> + </test> + <test> <param name="reference_source_selector" value="history" /> <param name="ref_file" value="chrI-ce11.fa"/> <param name="normal_bam" value="celegans_RG_1.bam"/> @@ -132,7 +160,8 @@ There is a blog-post describing smoove in more detail here: https://brentp.github.io/post/smoove/ -Currently, this Galaxy tool only wraps smoove for 2 samples (bam normal and tumor inputs), +Currently, this Galaxy tool only wraps smoove for 1, 2 (bam normal and tumor inputs) or +a small collection of samples (<40), which translates in the command line:: <![CDATA[smoove call --name my-cohort --exclude $bed --fasta $fasta -p $threads --genotype [--removepr] /path/to/*.bam]]>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result-6.vcf Fri Sep 25 10:43:20 2020 +0000 @@ -0,0 +1,59 @@ +##fileformat=VCFv4.2 +##FILTER=<ID=PASS,Description="All filters passed"> +##fileDate=20200925 +##reference=reference.fa +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles"> +##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record"> +##INFO=<ID=STRANDS,Number=.,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)"> +##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation"> +##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants"> +##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants"> +##INFO=<ID=CIPOS95,Number=2,Type=Integer,Description="Confidence interval (95%) around POS for imprecise variants"> +##INFO=<ID=CIEND95,Number=2,Type=Integer,Description="Confidence interval (95%) around END for imprecise variants"> +##INFO=<ID=MATEID,Number=.,Type=String,Description="ID of mate breakends"> +##INFO=<ID=EVENT,Number=1,Type=String,Description="ID of event associated to breakend"> +##INFO=<ID=SECONDARY,Number=0,Type=Flag,Description="Secondary breakend in a multi-line variants"> +##INFO=<ID=SU,Number=.,Type=Integer,Description="Number of pieces of evidence supporting the variant across all samples"> +##INFO=<ID=PE,Number=.,Type=Integer,Description="Number of paired-end reads supporting the variant across all samples"> +##INFO=<ID=SR,Number=.,Type=Integer,Description="Number of split reads supporting the variant across all samples"> +##INFO=<ID=BD,Number=.,Type=Integer,Description="Amount of BED evidence supporting the variant across all samples"> +##INFO=<ID=EV,Number=.,Type=String,Description="Type of LUMPY evidence contributing to the variant call"> +##ALT=<ID=DEL,Description="Deletion"> +##ALT=<ID=DUP,Description="Duplication"> +##ALT=<ID=INV,Description="Inversion"> +##ALT=<ID=DUP:TANDEM,Description="Tandem duplication"> +##ALT=<ID=INS,Description="Insertion of novel sequence"> +##ALT=<ID=CNV,Description="Copy number variable region"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=SU,Number=1,Type=Integer,Description="Number of pieces of evidence supporting the variant"> +##FORMAT=<ID=PE,Number=1,Type=Integer,Description="Number of paired-end reads supporting the variant"> +##FORMAT=<ID=SR,Number=1,Type=Integer,Description="Number of split reads supporting the variant"> +##FORMAT=<ID=BD,Number=1,Type=Integer,Description="Amount of BED evidence supporting the variant"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype quality"> +##FORMAT=<ID=SQ,Number=1,Type=Float,Description="Phred-scaled probability that this site is variant (non-reference in this sample"> +##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth"> +##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observations, with partial observations recorded fractionally"> +##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of reference observations"> +##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of alternate observations"> +##FORMAT=<ID=RS,Number=1,Type=Integer,Description="Reference allele split-read observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AS,Number=A,Type=Integer,Description="Alternate allele split-read observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=ASC,Number=A,Type=Integer,Description="Alternate allele clipped-read observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=RP,Number=1,Type=Integer,Description="Reference allele paired-end observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AP,Number=A,Type=Integer,Description="Alternate allele paired-end observation count, with partial observations recorded fractionally"> +##FORMAT=<ID=AB,Number=A,Type=Float,Description="Allele balance, fraction of observations from alternate allele, QA/(QR+QA)"> +##contig=<ID=chrI,length=15072434> +##smoove_version=0.2.5 +##smoove_count_stats=celegans-1:2869,2691,194,304 +##smoove_count_stats=celegans-2:2531,2421,134,276 +##source=LUMPY +##bcftools_annotateVersion=1.10.2+htslib-1.10.2 +##bcftools_annotateCommand=annotate -x INFO/PRPOS,INFO/PREND -Ou; Date=Fri Sep 25 09:37:11 2020 +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##bcftools_viewVersion=1.10.2+htslib-1.10.2 +##bcftools_viewCommand=view -c 1 -Oz -c 1 -o output-smoove.genotyped.vcf.gz; Date=Fri Sep 25 09:37:11 2020 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT celegans-1 celegans-2 +chrI 10416569 1 N <DUP> 170.7 . SVTYPE=DUP;SVLEN=981;END=10417550;STRANDS=-+:4;IMPRECISE;CIPOS=-769,29;CIEND=-30,636;CIPOS95=-165,8;CIEND95=-9,128;SU=4;PE=4;SR=0;AC=4;AN=4 GT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB 1/1:16:130.28:-15,-4,-2:14:4:9:4:9:0:0:0:4:9:0.69 1/1:3:40.43:-4,-1,-1:7:4:3:4:3:0:0:0:4:3:0.43