# HG changeset patch # User iuc # Date 1611758903 0 # Node ID 1fbe84e8a740cebe7618ae50f350814105e7c657 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/strelka commit 2e445e7c519b2b77498cb74c03ca6ed12b22423a" diff -r 000000000000 -r 1fbe84e8a740 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Jan 27 14:48:23 2021 +0000 @@ -0,0 +1,218 @@ + + + 2.9.10 + galaxy0 + small variant caller + + + strelka + samtools + + + + + 10.1038/s41592-018-0051-x + + + + + + $target_file && + tabix -p vcf $target_file && + #else: + ln -s '${sites.whitelist}' $target_file && + ln -s '${sites.whitelist.metadata.tabix_index}' ${target_file}.tbi' && + #end if + #if str($sites.use_whitelist_as) == 'indel_candidates': + #silent $reg_options.extend(['--indelCandidates', $target_file]) + #else: + #silent $reg_options.extend(['--forcedGT', $target_file]) + #end if + #end for + #if str($regions.restrict_to_region) == 'regions_from_file': + #silent $reg_options.append('--callRegions') + #set $target_file = 'input_callregions.bed.gz' + #if $regions.callRegions.ext == 'bed': + bgzip -c '$regions.callRegions' $target_file && + tabix -p bed $target_file && + else: + ln -s '$regions.callRegions' $target_file && + ln -s '$regions.callRegions.tabix_index' ${target_file}.tbi && + #end if + #silent $reg_options.append($target_file) + #end if + #set $region_spec = ' '.join($reg_options) + #if str($ref_cond.ref_sel) == 'history': + #set $reference_fasta_fn = 'input_ref.fasta' + ln -s '$ref_cond.ref' $reference_fasta_fn && + samtools faidx $reference_fasta_fn && + #else + #set $reference_fasta_fn = str($ref_cond.ref.fields.path) + #end if + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + +
+
+ + + + + + + + + + +*Sequencing Data* + +The input sequencing reads are expected to come from a paired-end sequencing assay. Any input other than paired-end reads are ignored by default except to double-check for putative somatic variant evidence in the normal sample during somatic variant analysis. Read lengths above ~400 bases are not tested. + +*Alignment Files* + +All input sequencing reads should be mapped by an external tool and provided as input in `BAM <https://samtools.github.io/hts-specs/SAMv1.pdf>`_. or `CRAM <https://samtools.github.io/hts-specs/CRAMv3.pdf>`_ format. + +The following limitations apply to the input BAM/CRAM alignment records: + +- Alignments cannot contain the "=" character in the SEQ field. +- RG (read group) tags are ignored -- each alignment file must represent one sample. +- Alignments with basecall quality values greater than 70 will trigger a runtime error (these are not supported on the assumption that the high basecall quality indicates an offset error) + +*VCF Files* + +Input `VCF <http://samtools.github.io/hts-specs/VCFv4.1.pdf>`_ files are accepted for a number of roles as described below. All input VCF records are checked for compatibility with the given reference genome, in additional to role-specific checks described below. If any VCF record's REF field is not compatible with the reference genome a runtime error will be triggered. 'Compatible with the reference genome' means that each VCF record's REF base either (1) matches the corresponding reference genome base or the VCF record's REF base is 'N' or the reference genome base is any ambiguous IUPAC base code (all ambiguous base codes are converted to 'N' while importing the reference). + + +Strelka2 is a fast and accurate small variant caller optimized for analysis of germline variation in small cohorts (Strelka Germline) and somatic variation in tumor/normal sample pairs (Strelka Somatic). + +Strelka accepts input read mappings from BAM or CRAM files, and optionally candidate and/or forced-call alleles from VCF. It reports all small variant predictions in VCF 4.1 format. Germline variant reporting uses the gVCF conventions to represent both variant and reference call confidence. For best somatic indel performance, Strelka is designed to be run with the Manta structural variant and indel caller, which provides additional indel candidates up to a given maxiumum indel size (by default this is 49). By design, Manta and Strelka run together with default settings provide complete coverage over all indel sizes (in additional to all SVs and SNVs) for clinical somatic and germline analysis scenarios. + + `_. + ]]> +
diff -r 000000000000 -r 1fbe84e8a740 strelka_germline.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/strelka_germline.xml Wed Jan 27 14:48:23 2021 +0000 @@ -0,0 +1,250 @@ + + + @DESCRIPTION@ for germline variation in small cohorts + + macros.xml + + + 1 and str($expert_settings.evs.selector) == "enableEVS" and $expert_settings.evs.reportEVSFeatures + echo "Reporting of EVS features can only be used with a single input sample" 1>&2; exit 1 + #else + ## initialize + #set $bam_inputs = [] + #for $i, $s in enumerate($bam): + #set $target_file = 'input_sample_%d.%s' % ($i, $s.ext) + ln -s '$s' $target_file && + #if $s.is_of_type('bam') + ln -s '$s.metadata.bam_index' ${target_file}.bai && + #elif $s.is_of_type('cram') + ln -s '$s.metadata.cram_index' ${target_file}.crai && + #end if + #silent $bam_inputs.extend(['--bam', $target_file]) + #end for + #set $bam_spec = ' '.join($bam_inputs) + ## Strelka requires both the --ploidy vcf and the --noCompress bed + ## to be bgzipped and tabixed. + ## Same for the --callRegions bed, but that's handled inside + ## the shared INIT code. + #if $pl.ploidy + #if $pl.ploidy.ext == 'vcf' + ln -s '$pl.ploidy' input_ploidy.vcf && + bgzip -c input_ploidy.vcf > input_ploidy.vcf.gz && + tabix -p vcf input_ploidy.vcf.gz && + #else + ## File is bgzipped and tabixed already + ## -> just symlink data and index + ln -s '$pl.ploidy' input_ploidy.vcf.gz && + ln -s '$pl.ploidy.metadata.tabix_index' input_ploidy.vcf.gz.tbi && + #end if + #end if + #if $oo.gvcf.emit_gvcfs == 'yes' and $oo.gvcf.noCompress + ln -s '$oo.gvcf.noCompress' input_nocompress.bed && + bgzip -c input_nocompress.bed > input_nocompress.bed.gz && + tabix -p bed input_nocompress.bed.gz && + #end if + @INIT@ + + ## create workflow + configureStrelkaGermlineWorkflow.py + $bam_spec + #if str($pl.callContinuousVf) + --callContinuousVf '$pl.callContinuousVf' + #end if + #if $pl.ploidy + --ploidy input_ploidy.vcf.gz + #end if + #if $oo.gvcf.emit_gvcfs == 'yes' and $oo.gvcf.noCompress + --noCompress input_nocompress.bed.gz + #end if + $expert_settings.s_e_e + @CREATE@ + + ## run workflow + @RUN@ + + ## decompress results + #if $oo.vcf_type == "decompressed" + ## we decompress just the main variants file + ## per-sample gvcf files are always emitted as a collection of + ## compressed files. + && bgzip -d results/results/variants/variants.vcf.gz + && mv results/results/variants/variants.vcf results/results/variants/variants_out + #else + && mv results/results/variants/variants.vcf.gz results/results/variants/variants_out + #end if + #end if + ]]> + + + +## parser cannot handle indents +[StrelkaGermline] +minMapq = $strelka.minMapq +@CONFIG@ + + + + + + + + + + + + + + + + + +
+ + +
+
+ + + + + + + + + + + +
+
+ + +
+
+ + + + + + + + + oo['gvcf']['emit_gvcfs'] == 'yes' + + + + + + + + + + +
+ +
+
+ + + + +
+ + + + + + + + + + +
+ + + + + + + +
+ +
+
+ +
+ +
+ + + + + + + + +
+ + + + +
+
+ +
+
+ + +
+ + + + + + + + + + + + + + + +
+
+ + +
diff -r 000000000000 -r 1fbe84e8a740 test-data/genome_test1.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome_test1.vcf Wed Jan 27 14:48:23 2021 +0000 @@ -0,0 +1,95 @@ +##fileformat=VCFv4.1 +##fileDate=. +##source=strelka +##source_version=2.9.10 +##startTime=. +##cmdline=./configureStrelkaGermlineWorkflow.py --bam input_sample_0.bam --bam input_sample_1.bam --bam input_sample_2.bam --disableSequenceErrorEstimation --config=/tmp/tmpmywmzdlj/job_working_directory/000/7/configs/tmpzw3187cr --referenceFasta input_ref.fasta --runDir results +##reference=file:///tmp/tmpmywmzdlj/job_working_directory/000/7/working/input_ref.fasta +##contig= +##content=strelka germline small-variant calls +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##Depth_demo20=79.00 +##FILTER= +##FILTER= +##FILTER= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12891 +demo20 1 . T . . LowGQX END=899;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP .:.:0:0:0 +demo20 900 . G . . LowGQX;LowDepth END=906;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:5:2:0:2 +demo20 907 . T . . LowGQX END=931;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:7:4:0:3 +demo20 932 . G . . PASS END=990;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:15:8:0:6 +demo20 991 . C G 75 PASS SNVHPOL=2;MQ=59 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:71:9:9:1:5,4:1,4:4,0:2.8:PASS:72,0,123 +demo20 992 . C . . PASS END=1084;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:24:14:0:9 +demo20 1085 . T . . PASS . GT:GQX:DP:DPF:MIN_DP 0/0:22:21:1:21 +demo20 1086 . G . . PASS END=1148;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:54:22:1:19 +demo20 1148 . C CTAT 144 PASS CIGAR=1M3I;RU=TAT;REFREP=1;IDREP=2;MQ=59 GT:GQ:GQX:DPI:AD:ADF:ADR:FT:PL 0/1:114:27:20:11,8:5,3:6,5:PASS:111,0,147 +demo20 1149 . T . . PASS END=1270;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:39:19:0:14 +demo20 1271 . A G 268 PASS SNVHPOL=4;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:126:30:18:0:8,10:7,6:1,4:-18.6:PASS:169,0,123 +demo20 1272 . G . . PASS END=1507;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:39:20:1:14 +demo20 1508 . A G 312 PASS SNVHPOL=3;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:172:30:22:1:10,12:4,6:6,6:-21.5:PASS:191,0,169 +demo20 1509 . G . . PASS END=1582;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:27:17:0:10 +demo20 1583 . A . . PASS END=1669;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:21:15:0:8 +demo20 1670 . C . . PASS END=1705;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:45:19:0:16 +demo20 1706 . C T 608 PASS SNVHPOL=2;MQ=59 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 1/1:54:30:19:0:0,19:0,8:0,11:-35.5:PASS:342,57,0 +demo20 1707 . G . . PASS END=1743;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:54:21:0:19 +demo20 1744 . C T 312 PASS SNVHPOL=3;MQ=59 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:159:30:21:0:9,12:5,6:4,6:-20.7:PASS:191,0,156 +demo20 1745 . G . . PASS END=1845;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:40:21:1:16 +demo20 1846 . C T 165 PASS SNVHPOL=3;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:116:30:24:1:16,8:13,5:3,3:-12.4:PASS:117,0,224 +demo20 1847 . G . . PASS END=1872;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:60:23:1:21 +demo20 1873 . C T 122 PASS SNVHPOL=3;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/0:60:60:21:0:21,0:15,0:6,0:0.0:PASS:0,63,360 +demo20 1874 . C . . PASS END=2073;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:45:21:0:16 +demo20 2074 . T C 246 PASS SNVHPOL=2;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:156:30:24:1:13,11:4,8:9,3:-9.7:PASS:158,0,191 +demo20 2075 . A . . PASS END=2198;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:63:31:1:22 +demo20 2199 . G A 297 PASS SNVHPOL=3;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:181:30:28:1:14,14:12,5:2,9:-14.3:PASS:183,0,189 +demo20 2200 . C . . PASS END=2300;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:60:30:1:26 +demo20 2301 . G T 369 PASS SNVHPOL=2;MQ=59 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:161:22:29:1:12,17:6,11:6,6:-21.0:PASS:219,0,158 +demo20 2302 . T . . PASS END=2454;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:72:30:0:25 +demo20 2455 . T C 889 PASS SNVHPOL=2;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 1/1:90:30:31:2:0,31:0,14:0,17:-51.4:PASS:370,93,0 +demo20 2456 . G . . PASS END=2511;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:87:36:2:30 +demo20 2512 . A G 531 PASS SNVHPOL=2;MQ=59 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:151:22:39:1:13,26:9,11:4,15:-28.4:PASS:300,0,148 +demo20 2513 . T . . PASS END=2639;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:81:37:1:28 +demo20 2640 . C T 751 PASS SNVHPOL=3;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 1/1:81:30:28:0:0,28:0,14:0,14:-47.3:PASS:370,84,0 +demo20 2641 . T . . PASS END=2659;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:60:23:0:21 +demo20 2660 . G T 567 PASS SNVHPOL=3;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 1/1:60:30:21:1:0,21:0,11:0,10:-36.2:PASS:321,63,0 +demo20 2661 . G . . PASS END=3037;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:41:27:1:18 +demo20 3038 . C . . PASS END=3053;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:27:20:1:17 +demo20 3054 . G C 214 PASS SNVHPOL=2;MQ=59 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:140:22:20:0:10,10:6,6:4,4:-12.8:PASS:142,0,153 +demo20 3055 . C . . PASS END=3365;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:51:25:1:18 +demo20 3366 . G T 753 PASS SNVHPOL=4;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 1/1:75:30:26:0:0,26:0,15:0,11:-42.1:PASS:370,78,0 +demo20 3367 . G . . PASS END=3536;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:60:28:0:21 +demo20 3537 . C T 191 PASS SNVHPOL=2;MQ=59 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:128:22:31:1:21,10:8,6:13,4:-11.3:PASS:130,0,256 +demo20 3538 . T . . PASS END=3664;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:54:29:1:19 +demo20 3664 . TC T 572 PASS CIGAR=1M1D;RU=C;REFREP=4;IDREP=3;MQ=59 GT:GQ:GQX:DPI:AD:ADF:ADR:FT:PL 0/1:249:27:41:18,20:10,10:8,10:PASS:322,0,246 +demo20 3665 . C . . PASS . GT:GQX:DP:DPF:MIN_DP 0:249:19:0:19 +demo20 3666 . C . . PASS END=4019;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:48:32:1:17 +demo20 4020 . C . . PASS END=4059;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:24:12:1:9 +demo20 4060 . C . . PASS END=4072;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:15:6:0:6 +demo20 4073 . C . . LowGQX END=4091;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:7:4:0:3 +demo20 4092 . T . . LowGQX;LowDepth END=4100;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP 0/0:3:1:0:1 +demo20 4101 . T . . LowGQX END=5000;BLOCKAVG_min30p3a GT:GQX:DP:DPF:MIN_DP .:.:0:0:0 diff -r 000000000000 -r 1fbe84e8a740 test-data/hg98.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/hg98.fa Wed Jan 27 14:48:23 2021 +0000 @@ -0,0 +1,85 @@ +>demo20 +TGCCTGCTTTGTGCCAGGTTCTGGGTTGGGAGGTGCTGGGGACAGGGAGATGAGTCAGAC +CTgggaagatttcgtagaggaggtgacagtaagctggaacctgtgtaatgagcaggagtt +gcccagtggagaaggggaaggtgttccaggcggaagaaacagcatatgcaaaggccccaa +ggtaggaagggccctagtgtgtgcagaggacagggcatggggaggggaactaaggctgag +gccaaggagaggaaatgactcacaccgtgagagaggagttgagaccagggaggCTGCTTG +CTGTATGATGCAACTGAGAGGGTAGAACAAGGCTGGCACAGAGAAGGTGGGGAAGGAAAA +GGAGAGACGAAGCTGAGATTTCAGCAGGGCCAAGTCAGCCGTGAGTGCCAGGCTGCGGAG +CCCAGATTCTCTGGGCTGagaaagagcactctgtccagagtgtggaggggggcctggagg +ggatgagactcaaagctgggaggccagagaggaggctgctagagttttctgggagagagt +tactggggcctgaacTCCAGTGAGGCActtcccatttcacagaccaggaaactgaggccc +aagagtgaggcaactggcccaagggcacacagccaggtaaggcagaacCTTCCTTCTTTG +GAGCTCCCTTGGGTGGGAAGCTGTGGGCTCCCCTTCATAGCCCACCCTTTTGGCTGTATC +TCCCTGCTGCCCTGGGCATATGCTCCCTTATTCTGTCCTCCCTTGAAGCTGACTGCTGGC +CTTAAAGGGCCCCTGTTTCTTCCCTCTGGACTACATGAGATCGGGATAGTATTAATGACT +AAAACCTACCAGGGGTTTCTAGGCCTGGCCTGGAAAAAGTGACTGTTGACAAACAAAGTG +CAGAGATTTAAAATCTCCTCTGTCTCAATTAGTGGAATCCAGTTAGAGGTTTGAACTATG +ATTCTACCAGAATCCAATCTCTCTGGGTAGCCAGGTACCCAGGATGGGGCTAAAATTCCA +GATGGATAGGTTGTCAACACCAGTGAGGAACCAGGAGGCTACCACAGGGTGGGACTTCCT +GGTTTGGCTTTACATCTGAACTTCAGGGATCCCAGATCCTGGATCTGGGGCACTTGTCCA +GAGAAGGCTATTGCTCTCATGTCACAAATGAGATGACTAAGACCCCCAAATCAATTCCAG +TTCACTCACAAGCATTTCCTGGGCAGTGGAGACCCCTGCCCCACCTGTTGGCACCCCCTC +AGCTCCCCACAGGGAATTGGAGTCCAGCCAAGCATGAGGAGGCTGTTGGCCTCAAGGTGA +GCAGGGATGGGCTGAACCTCACCCAGTAAGGCAAGGACAGAGCCAGGGTTGGCCTGAGAT +TTCCAGCCACCCTTTCCAAGGCTCTGCTCACTGTTATTTTCCTTAGTCTacaacaatatc +aataacaataacaacaataatatcaacaCAAAAAGTGAAATACTCACCATGTATTGTAGT +GTTTCCAAGGTGTCATGTAATGCCAGGGGTAGTTTGGGGCCAGGAAAAATATTTTTGGGA +GGCATAAGAATAGGATGGACTGATATTGATATGCAACAGTTTGATCTGGTCCTCCTCTGA +ATATCTGGGCTGGTAATTTGTACCAGTTTCCCTCGCTTTtgtgcataggcactgtgctga +acccttttgtatgcatgaactcatccgattctctgtgcaagaactctatgagattattat +tcccgttttacaagtaagaaaaattgaggctctgagaagttaaataaatgacttgtatga +agttccagtgctaattaataagtgaaggagccagggcttgaactccggcccatctgactg +caaagccagtgcccttcctcctacacATCTTCCTTTGGATTTCCACCACTGAGCATATGT +AAGGTTGGGCAAACAGCCTGCATGAACAATCGCTGCTTTTATATCATGCACAAGTTTGGT +CTTTTCTGCCTGTGCCCATGTCCTTGTAACCTTCTGAACCAAACTCCCCAGTGCCTGGGA +ACATCAGAAGACTTGACTCTCTTCTCCTTTCACTAGCCTCCACCTGACTGGGACAAAGCC +ATGCAGAGAGCTAGTGCTCCCTTCCTGCTAGACTTCAAGGATGCCTGGTTCCTGTGCCCC +ATCTCCATCCAGCCCTCTCTTCTACACCTGGTGACTGAGCCTCTCCTTCAGTTTCTCCAT +CCAGAAGGGGGTGAAAGCAACTGCCTAGTGTCCTTCCCTGGTGATAGTGGAGCACGGGGG +ACAGGGTGTTTGGGCAAAAGGTGCCCAAGGTGAGGTGCCCAACACAACCTCCTACTCAGA +CGATTGAGCAGACATTCAGCCTCATCTGGGGACTGGGTTACCAGTGGGTTAGTGGGAGGC +ATTGGGCCCAGGCCCTGTGCCTTGGGCTGAGCTACAAGAAACCCACACATGGGATGAATT +CAGGCAGCTCAAGGCCAGGTCTGTGCATACGCCAGTAATAGGTTCAGGTTAATCCACATG +TCGCGATTTGGAAGGTGTCTACTTTTCCTACCTGTAGCTTCCTTAGGCCTCAAACCCCTA +CTCAGCTGGGTCTGCCAGACTGAGATGGAGCCAGGGTGGAATCTTCTGCCCTCAAATCCC +TGTCAGCCCTGGTGGTGCCGGGAGCGCCATCACTATTGGGTCTTAAAGGCTTTCCAGCCT +TCCACTATGGATCCAGGAGCAGCAGTAGCCCCTTTGGTCTTTCTCTCTCATCAGGACATC +TCCACTCATGGTTCCAGTCAGAGCTTCTTGAAAGTAGTCCCACTCTGTTCAAAAGCCTCC +CATGCCCCCTGCTAGCCTCAGGCTAAGAGCCCTTCTCCTTCGCACAGCCTTTGGACCTGT +CTATTTTTATGGTCTGGAAACTTCAGGAACACTGATAGCTGAGCATCTGGCACATATTAT +GCACTCAAAAACCATGTATTTCTTTCTCCTTCCCTTTGGGACCCGTAAACCAGGGactgg +acatttttgcaagagacaggagctgtgactgtgcattcactgctgtatccccagcaccca +gcactgggcctgccacacagtaagtgcttagtaaatgtttgttgactgagtgaTTGCAGC +TGGGGCCAAGAATGCCTTGGACACCCCAAGTAGGCCGTGTTAGAAGGAGTCAGTGAGAGC +CTGGGAGCCCAGCCCAGAATTGTTTTCTTGACCCAGAAGCCAGGGCCAGGGATGCCTCTT +CACTTCTGTTTGGCCCTCTTGGGCTTAGGGGCAGGGGCATTAAGATGAgagaggtccttg +gggtgcattgagtctaacctcccagttcctcccattctacagccaggaaaactgaggccc +agggaggggtaggacaagcccaagAAAGTGGGGCTGGAGAATGAATCCTGGAGACCAAAC +TTGTCAGTCTGGATTGCTGTTGCCCTCATCCTCGCCTCCAAAACCCatgggtaaactgag +gccaagagaggggcaggggcatgcccaaggtcacccatggaatcaggggacagggcctgg +attgggattgttgttgacgccattattactgtttattgttgtttctatttcacAGATGGT +CGGGGAGGGTGGGGCCCGCAATGGCTCCCAGGCGCCCAGAGACCCTGGAGGGTGAGCAGG +GTCTCCCCTCCCCTCTCCTGCCCGTCTTTAGCCACACTGGGGCGCACACCGCTCACTCAC +CCGGGGCCGAGGCGTTAGCCCTTTCTTGCACCAGGTGCCGCAACAACACCAGCAGCTGGC +GCAGGCTGTGCTGCTGGTCCTGCAGGAGGCTGGAGTTGTGCCTGACACCGCGCAGGCCGC +GCTCGATGTTGGTGAGGGCGGCGCTCTGGCGGCTCAGCGTGTTCAGCAGCTTCGCCTTCT +TGCTGAGGATGCTGGCCAGCTCCTCCTGCTGCTTGGTCTCCAGGGCCTGCAACCGCTTCT +CGAGCGCGCTGCGGGGTAGGGGGCGCACAGAGGTGAGCCTGGCATCCTCGCGAAGCACGC +ACCCCCGCGCGCCTCCCCGGCCCTGGAGTCCCTGCAGCCCGACGATGAGACTCAAGTGTG +GTGGAACGTCCTGTGCCCACTGTAGGCACAGATTGAGGAGGGGAGAAAAGAGATACCCGG +CCCTGGAGTAATATAGATTGAGGTTTAGTGGAAGAAAGAGGTGGTGTGGGAGGGACACCA +GCAACTGGGTAGCTATTATCAAATCCCAACTGTGCTTGCTTTTTGACCCAGCAGTCTACT +CTCAGGAGTTATCCTCTAGGGAGCATGGTCAAGAATGGCCCTGGGGACTTGCTTGTAATA +GAAAAACAAACTAACCAACCAACCAAAAAAGATATAGCCTAGATGCCCAAAAGCCAGGAC +TGGCTGAATCCGTTGCGTTTTGGCGTCCCTGGAAATGTTCCGCAGTCATGAAGGAGGAGG +CATTTGCACAGAATTGGAAAGATGCCCAGGACTTGGGGCACATCAAGCCTAACCCCATGT +GTGGCAAGAGAAGAAAGAAAGTATTAATGTAAATAAAGAGAAATGGGGTGAACATATAGG +AGAAGGCTGGAAAGACCGCAGTGGTGCCTGTGTTTGGGAAGAATATGAAAGAAATTCCCT +CAAGTGCTGTGACTTCTGCAGAGCAGGTTTGAGTGGATGAAGATGGAGAGGAGGAAGATG +GGGGCAGGATGGAGGGCCCAACTTTCACTTTTATTTTGTACAGGTTCCTGTTGTCCGATG +ATATTATAATAATCAAGAGACATTTTTTGTAATGGATTTAGAAGCAAAGAGGAGTTTTTC +AAAAGAAAGCCTTAGACTCAGCTCTTTCTTTTTGGACATTTTATCCTCCAGATTTACTca +catgtgtgtgaaatgagatatggaaatgttactcatcgtatcactggttggattagtaaa +aggctggaagcaacctcaatatccattaactggggactggaggaataaaagcagggacca +catatggtggagcattataa diff -r 000000000000 -r 1fbe84e8a740 test-data/hg98.fa.fai --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/hg98.fa.fai Wed Jan 27 14:48:23 2021 +0000 @@ -0,0 +1,1 @@ +demo20 5000 8 60 61 diff -r 000000000000 -r 1fbe84e8a740 test-data/indels_test2.vcf.gz Binary file test-data/indels_test2.vcf.gz has changed diff -r 000000000000 -r 1fbe84e8a740 test-data/sample1.bam Binary file test-data/sample1.bam has changed diff -r 000000000000 -r 1fbe84e8a740 test-data/sample1.cram Binary file test-data/sample1.cram has changed diff -r 000000000000 -r 1fbe84e8a740 test-data/sample2.bam Binary file test-data/sample2.bam has changed diff -r 000000000000 -r 1fbe84e8a740 test-data/sample2.cram Binary file test-data/sample2.cram has changed diff -r 000000000000 -r 1fbe84e8a740 test-data/sample3.bam Binary file test-data/sample3.bam has changed diff -r 000000000000 -r 1fbe84e8a740 test-data/snvs_test2.vcf.gz Binary file test-data/snvs_test2.vcf.gz has changed diff -r 000000000000 -r 1fbe84e8a740 test-data/test_fasta_indexes.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_fasta_indexes.loc Wed Jan 27 14:48:23 2021 +0000 @@ -0,0 +1,1 @@ +hg19 hg19 hg19 ${__HERE__}/hg98.fa \ No newline at end of file diff -r 000000000000 -r 1fbe84e8a740 test-data/variants_test2.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/variants_test2.vcf Wed Jan 27 14:48:23 2021 +0000 @@ -0,0 +1,62 @@ +##fileformat=VCFv4.1 +##fileDate=. +##source=strelka +##source_version=2.9.10 +##startTime=. +##cmdline=./configureStrelkaGermlineWorkflow.py --bam input_sample_0.cram --bam input_sample_1.cram --disableSequenceErrorEstimation --config=/tmp/tmpmxn8erma/job_working_directory/000/4/configs/tmpx1j1a_0u --referenceFasta input_ref.fasta --runDir results +##reference=file:///tmp/tmpmxn8erma/job_working_directory/000/4/working/input_ref.fasta +##contig= +##content=strelka germline small-variant calls +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##Depth_demo20=53.00 +##FILTER= +##FILTER= +##FILTER= +##FILTER= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12891 NA12892 +demo20 991 . C G 38 PASS SNVHPOL=2;MQ=59 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:71:9:9:1:5,4:1,4:4,0:2.8:PASS:72,0,123 0/0:33:33:12:0:12,0:9,0:3,0:0.0:PASS:0,36,258 +demo20 1148 . C CTAT 72 PASS CIGAR=1M3I;RU=TAT;REFREP=1;IDREP=2;MQ=60 GT:GQ:GQX:DPI:AD:ADF:ADR:FT:PL 0/1:114:27:20:11,8:5,3:6,5:PASS:111,0,147 0/0:69:69:28:24,0:12,0:12,0:PASS:0,72,443 +demo20 1271 . A G 134 PASS SNVHPOL=4;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:126:30:18:0:8,10:7,6:1,4:-18.6:PASS:169,0,123 0/0:75:75:26:0:26,0:18,0:8,0:0.0:PASS:0,78,370 +demo20 1508 . A G 156 PASS SNVHPOL=3;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:172:30:22:1:10,12:4,6:6,6:-21.5:PASS:191,0,169 0/0:108:108:37:2:37,0:19,0:18,0:0.0:PASS:0,111,370 +demo20 1706 . C T 304 PASS SNVHPOL=2;MQ=59 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 1/1:54:30:19:0:0,19:0,8:0,11:-35.5:PASS:342,57,0 0/0:90:90:31:2:31,0:7,0:24,0:0.0:PASS:0,93,370 +demo20 1744 . C T 156 PASS SNVHPOL=3;MQ=59 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:159:30:21:0:9,12:5,6:4,6:-20.7:PASS:191,0,156 0/0:78:78:27:0:27,0:6,0:21,0:0.0:PASS:0,81,370 +demo20 1846 . C T 83 PASS SNVHPOL=3;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:116:30:24:1:16,8:13,5:3,3:-12.4:PASS:117,0,224 0/0:60:60:21:0:21,0:14,0:7,0:0.0:PASS:0,63,370 +demo20 1873 . C T 122 PASS SNVHPOL=3;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/0:60:60:21:0:21,0:15,0:6,0:0.0:PASS:0,63,360 0/1:155:30:23:0:13,10:8,7:5,3:-14.9:PASS:157,0,195 +demo20 2074 . T C 123 PASS SNVHPOL=2;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:156:30:24:1:13,11:4,8:9,3:-9.7:PASS:158,0,191 0/0:75:75:26:0:26,0:14,0:12,0:0.0:PASS:0,78,370 +demo20 2199 . G A 149 PASS SNVHPOL=3;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:181:30:28:1:14,14:12,5:2,9:-14.3:PASS:183,0,189 0/0:96:96:33:0:33,0:17,0:16,0:0.0:PASS:0,99,370 +demo20 2301 . G T 184 PASS SNVHPOL=2;MQ=59 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:161:22:29:1:12,17:6,11:6,6:-21.0:PASS:219,0,158 0/0:75:75:26:1:26,0:15,0:11,0:0.0:PASS:0,78,370 +demo20 2455 . T C 445 PASS SNVHPOL=2;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 1/1:90:30:31:2:0,31:0,14:0,17:-51.4:PASS:370,93,0 0/0:78:78:27:1:27,0:11,0:16,0:0.0:PASS:0,81,370 +demo20 2512 . A G 266 PASS SNVHPOL=2;MQ=59 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:151:22:39:1:13,26:9,11:4,15:-28.4:PASS:300,0,148 0/0:69:69:24:2:24,0:8,0:16,0:0.0:PASS:0,72,370 +demo20 2640 . C T 375 PASS SNVHPOL=3;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 1/1:81:30:28:0:0,28:0,14:0,14:-47.3:PASS:370,84,0 0/0:102:102:35:0:35,0:17,0:18,0:0.0:PASS:0,105,370 +demo20 2660 . G T 283 PASS SNVHPOL=3;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 1/1:60:30:21:1:0,21:0,11:0,10:-36.2:PASS:321,63,0 0/0:87:87:30:0:30,0:15,0:15,0:0.0:PASS:0,90,370 +demo20 3054 . G C 107 PASS SNVHPOL=2;MQ=58 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:140:22:20:0:10,10:6,6:4,4:-12.8:PASS:142,0,153 0/0:24:24:9:2:9,0:4,0:5,0:0.0:PASS:0,27,201 +demo20 3366 . G T 377 PASS SNVHPOL=4;MQ=60 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 1/1:75:30:26:0:0,26:0,15:0,11:-42.1:PASS:370,78,0 0/0:75:75:26:0:26,0:13,0:13,0:0.0:PASS:0,78,370 +demo20 3537 . C T 95 PASS SNVHPOL=2;MQ=59 GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL 0/1:128:22:31:1:21,10:8,6:13,4:-11.3:PASS:130,0,256 0/0:84:84:29:1:29,0:10,0:19,0:0.0:PASS:0,87,370 +demo20 3664 . TC T 286 PASS CIGAR=1M1D;RU=C;REFREP=4;IDREP=3;MQ=59 GT:GQ:GQX:DPI:AD:ADF:ADR:FT:PL 0/1:249:27:41:18,20:10,10:8,10:PASS:322,0,246 0/0:70:70:25:25,0:10,0:15,0:PASS:0,73,493 diff -r 000000000000 -r 1fbe84e8a740 tool-data/fasta_indexes.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Wed Jan 27 14:48:23 2021 +0000 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a fasta_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The fasta_indexes.loc +#file has this format (white space characters are TAB characters): +# +# +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the fasta_indexes.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your fasta_indexes.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa \ No newline at end of file diff -r 000000000000 -r 1fbe84e8a740 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Jan 27 14:48:23 2021 +0000 @@ -0,0 +1,7 @@ + + + + value, dbkey, name, path + +
+
\ No newline at end of file diff -r 000000000000 -r 1fbe84e8a740 tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Wed Jan 27 14:48:23 2021 +0000 @@ -0,0 +1,7 @@ + + + + value, dbkey, name, path + +
+
\ No newline at end of file