Repository 'ctat_mutations'
hg clone https://toolshed.g2.bx.psu.edu/repos/trinity_ctat/ctat_mutations

Changeset 2:c15d9049ab81 (2018-11-14)
Previous changeset 1:be2f3ce8562b (2018-11-14) Next changeset 3:b591b35283e4 (2018-11-14)
Commit message:
Uploaded
added:
ctat_mutations-7cf8f5889a4d/ctat_mutations.xml
ctat_mutations-7cf8f5889a4d/test-data/reads_1.fastq.gz
ctat_mutations-7cf8f5889a4d/test-data/reads_2.fastq.gz
ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/annotated_min_filtered.vcf.gz
ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/cancer.tab
ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/cancer.vcf
ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/misc/recalibrated.bai
ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/misc/recalibrated.bam
ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/mutation_inspector.json
ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/variants.vcf
ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/variants.vcf.idx
ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/variants_initial_filtering_clean_snp_RNAedit.vcf_snpeff_updated.vcf.gz
ctat_mutations-7cf8f5889a4d/tool-data/cravat_tissues.loc.sample
ctat_mutations-7cf8f5889a4d/tool-data/ctat_genome_resource_libs.loc.sample
ctat_mutations-7cf8f5889a4d/tool_data_table_conf.xml.sample
removed:
ctat_mutations.xml
tool_data_table_conf.xml.sample
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations-7cf8f5889a4d/ctat_mutations.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ctat_mutations-7cf8f5889a4d/ctat_mutations.xml Wed Nov 14 12:20:23 2018 -0500
[
@@ -0,0 +1,68 @@
+<tool id="ctat_mutations" name="ctat_mutations" version="1.0.0" profile="17.05">
+    <description>Mutation Pipeline for calling SNPs and variants</description>
+    <requirements>
+        <requirement type="package" version="2.0.1">ctat-mutations</requirement>
+    </requirements>
+    <command detect_errors="default">
+      <![CDATA[
+            ctat_mutations \
+                   --plot \
+                   --out_dir varcalling.outdir \
+                   --threads 8 \
+                   --variant_filtering_mode GATK \
+                   --left "$left" \
+                   --right "$right" \
+                   --genome_lib_dir "${genome_resource_lib.fields.path}" \
+                   --variant_call_mode GATK \
+                   --tissue_type "$tissue_type" \
+                   --email "$cravat_email"
+      ]]>
+    </command>
+    <inputs>
+      <param format="fastq" name="left" type="data" label="Left/Forward strand reads" help="Left read"/>
+      <param format="fastq" name="right" type="data" label="Right/Reverse strand reads" help="Right read"/>
+      <param name="tissue_type" type="select" label="Select a pathology" help="If you don't know, just choose 'General Purpose'">
+        <options from_data_table="ctat_cravat_tissues">
+  <filter type="sort_by" column="1"/>
+  <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+        </options>
+      </param>
+      <param name="genome_resource_lib" type="select" label="Select a reference genome">
+        <options from_data_table="ctat_genome_resource_libs">
+          <filter type="sort_by" column="2" />
+          <validator type="no_options" message="No indexes are available" />
+        </options>
+      </param>
+      <param name="cravat_email" type="text" label="CRAVAT email" help="CRAVAT registered email id"/>      
+      <section name="adv" title="This service uses the GATK4. GATK4 is licensed by the Broad Institute and is made available to academic users of this service for non-commercial use only. The full text of the license is available here: https://www.broadinstitute.org/gatk/about/license.html. For more information about GATK and full documentation, please visit the GATK website: https://www.broadinstitute.org." expanded="False">
+      </section>
+    </inputs>
+    <outputs>
+      <data format="tabular" name="cancertab" label="${tool.name} on ${on_string}: Cancer Tab" from_work_dir="varcalling.outdir/cancer.tab"/> 
+      <data format="vcf" name="cancerVariants" label="${tool.name} on ${on_string}: Cancer VCF" from_work_dir="varcalling.outdir/cancer.vcf"/> 
+      <data format="vcf" name="allVariants" label="${tool.name} on ${on_string}: All Variants VCF" from_work_dir="varcalling.outdir/variants.vcf"/> 
+      <data format="bam" name="bamfile" label="Bam used in haplotype calling" from_work_dir="varcalling.outdir/misc/recalibrated.bam"/>
+      <data format="txt" name="cravat" label="Annotated (lightly filtered) VCF file" from_work_dir="varcalling.outdir/variants_initial_filtering_clean_snp_RNAedit.vcf_snpeff_updated.vcf.gz"/>
+    </outputs>
+    <tests>
+        <test>
+           <param name="left" value="reads_1.fastq.gz"/>
+           <param name="right" value="reads_2.fastq.gz"/>
+           <param name="tissue_type" value="Other"/>
+           <output name="cancertab" file="varcalling.outdir/cancer.tab" />
+           <output name="cancerVariants" file="varcalling.outdir/cancer.vcf" />
+           <output name="allVariants" file="varcalling.outdir/variants.vcf" />
+           <output name="bamfile" file="varcalling.outdir/misc/recalibrated.bam" />
+           <output name="cravat" file="varcalling.outdir/variants_initial_filtering_clean_snp_RNAedit.vcf_snpeff_updated.vcf.gz"/>
+        </test>
+    </tests>
+    <help>
+
+.. class:: warningmark
+
+Mutation detection in RNA-Seq highlights the GATK Best Practices in RNA-Seq variant calling, several sources of variant annotation, and filtering based on CRAVAT.
+
+    </help>
+    <citations>
+    </citations>
+</tool>
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations-7cf8f5889a4d/test-data/reads_1.fastq.gz
b
Binary file ctat_mutations-7cf8f5889a4d/test-data/reads_1.fastq.gz has changed
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations-7cf8f5889a4d/test-data/reads_2.fastq.gz
b
Binary file ctat_mutations-7cf8f5889a4d/test-data/reads_2.fastq.gz has changed
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/annotated_min_filtered.vcf.gz
b
Binary file ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/annotated_min_filtered.vcf.gz has changed
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/cancer.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/cancer.tab Wed Nov 14 12:20:23 2018 -0500
b
@@ -0,0 +1,14 @@
+CHROM POS REF ALT GENE DP QUAL MQ SAO NSF NSM NSN TUMOR TISSUE COSMIC_ID KGPROD RS PMC CHASM_PVALUE CHASM_FDR VEST_PVALUE VEST_FDR
+chr5 474989 A G LOC100288152,SLC9A3 4 96.03 60 NA NA NA NA carcinoma_--_NS urinary_tract COSM4006021 NA NA NA 0.1114 0.2 0.96802 1
+chr5 181224474 G A TRIM41 45 349.77 60 NA NA NA NA NA NA NA NA NA NA 0.0694 0.15 0.48052 1
+chr8 143923759 G A PLEC 66 838.77 60 NA NA NA NA carcinoma_--_adenocarcinoma large_intestine COSM3750086 NA NA NA 0.0344 0.1 0.84202 1
+chr12 56420869 G A TIMELESS 48 384.77 60 NA NA NA NA carcinoma_--_adenocarcinoma large_intestine COSM3753397 NA NA NA 0.0744 0.15 0.18439 0.95
+chr17 7673767 C T TP53 61 1848.77 60 NA NA NA NA Ewings_sarcoma-peripheral_primitive_neuroectodermal_tumour_--_NS bone COSM3717625 NA NA NA 0 0.05 0.01447 0.15
+chr17 7676154 G C TP53 80 2161.77 60 NA NA NA NA haematopoietic_neoplasm_--_acute_myeloid_leukaemia haematopoietic_and_lymphoid_tissue COSM3766193 NA NA NA 0.087 0.15 0.52717 1
+chr17 43071077 T C BRCA1 4 92.03 60 NA NA NA NA haematopoietic_neoplasm_--_acute_myeloid_leukaemia haematopoietic_and_lymphoid_tissue COSM3755560 NA NA NA 0.0372 0.1 0.3446 1
+chr17 43091983 T C BRCA1 4 84.03 60 NA NA NA NA haemangioblastoma_--_NS soft_tissue COSM3755561 NA NA NA 0.0002 0.05 0.64447 1
+chr17 43092919 G A BRCA1 2 33.74 60 NA NA NA NA carcinoma_--_NS prostate COSM3755564 NA NA NA 0.0004 0.05 0.33539 1
+chr17 43093454 C T BRCA1 11 425.77 60 NA NA NA NA rhabdomyosarcoma_--_embryonal soft_tissue COSM4989394 NA NA NA 0.0014 0.05 0.51068 1
+chr19 39177761 G C PAK4 106 1134.77 60 NA NA NA NA NA NA NA NA NA NA 0.0004 0.05 0.01093 0.15
+chr19 47271515 T C CCDC9 12 336.77 60 NA NA NA NA haematopoietic_neoplasm_--_acute_myeloid_leukaemia haematopoietic_and_lymphoid_tissue COSM3721172 NA NA NA 0.093 0.15 0.97622 1
+chr20 46687147 C T TP53RK 26 423.77 60 NA NA NA NA carcinoma_--_ductal_carcinoma pancreas COSM3758608 NA NA NA 0.0834 0.15 0.88584 1
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/cancer.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/cancer.vcf Wed Nov 14 12:20:23 2018 -0500
b
b'@@ -0,0 +1,87 @@\n+##fileformat=VCFv4.2\n+##FILTER=<ID=PASS,Description="All filters passed">\n+##FILTER=<ID=FS,Description="FS > 30.0">\n+##FILTER=<ID=LowQual,Description="Low quality">\n+##FILTER=<ID=QD,Description="QD < 2.0">\n+##FILTER=<ID=SnpCluster,Description="SNPs found in clusters">\n+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##GATKCommandLine=<ID=HaplotypeCaller,CommandLine="HaplotypeCaller  --recover-dangling-heads true --dont-use-soft-clipped-bases true --standard-min-confidence-threshold-for-calling 20.0 --output /broad/hptmp/bankapur/full_mut/varcalling.outdir/variants.vcf --input /broad/hptmp/bankapur/full_mut/varcalling.outdir/misc/recalibrated.bam --reference /seq/regev_genome_portal/RESOURCES/CTAT_GENOME_LIB/GRCh38_v27_CTAT_lib_Feb092018/ctat_genome_lib_build_dir/ref_genome.fa  --emit-ref-confidence NONE --gvcf-gq-bands 1 --gvcf-gq-bands 2 --gvcf-gq-bands 3 --gvcf-gq-bands 4 --gvcf-gq-bands 5 --gvcf-gq-bands 6 --gvcf-gq-bands 7 --gvcf-gq-bands 8 --gvcf-gq-bands 9 --gvcf-gq-bands 10 --gvcf-gq-bands 11 --gvcf-gq-bands 12 --gvcf-gq-bands 13 --gvcf-gq-bands 14 --gvcf-gq-bands 15 --gvcf-gq-bands 16 --gvcf-gq-bands 17 --gvcf-gq-bands 18 --gvcf-gq-bands 19 --gvcf-gq-bands 20 --gvcf-gq-bands 21 --gvcf-gq-bands 22 --gvcf-gq-bands 23 --gvcf-gq-bands 24 --gvcf-gq-bands 25 --gvcf-gq-bands 26 --gvcf-gq-bands 27 --gvcf-gq-bands 28 --gvcf-gq-bands 29 --gvcf-gq-bands 30 --gvcf-gq-bands 31 --gvcf-gq-bands 32 --gvcf-gq-bands 33 --gvcf-gq-bands 34 --gvcf-gq-bands 35 --gvcf-gq-bands 36 --gvcf-gq-bands 37 --gvcf-gq-bands 38 --gvcf-gq-bands 39 --gvcf-gq-bands 40 --gvcf-gq-bands 41 --gvcf-gq-bands 42 --gvcf-gq-bands 43 --gvcf-gq-bands 44 --gvcf-gq-bands 45 --gvcf-gq-bands 46 --gvcf-gq-bands 47 --gvcf-gq-bands 48 --gvcf-gq-bands 49 --gvcf-gq-bands 50 --gvcf-gq-bands 51 --gvcf-gq-bands 52 --gvcf-gq-bands 53 --gvcf-gq-bands 54 --gvcf-gq-bands 55 --gvcf-gq-bands 56 --gvcf-gq-bands 57 --gvcf-gq-bands 58 --gvcf-gq-bands 59 --gvcf-gq-bands 60 --gvcf-gq-bands 70 --gvcf-gq-bands 80 --gvcf-gq-bands 90 --gvcf-gq-bands 99 --indel-size-to-eliminate-in-ref-model 10 --use-alleles-trigger false --disable-optimizations false --just-determine-active-regions false --dont-genotype false --max-mnp-distance 0 --dont-trim-active-regions false --max-disc-ar-extension 25 --max-gga-ar-extension 300 --padding-around-indels 150 --padding-around-snps 20 --kmer-size 10 --kmer-size 25 --dont-increase-kmer-sizes-for-cycles false --allow-non-unique-kmers-in-ref false --num-pruning-samples 1 --do-not-recover-dangling-branches false --min-dangling-branch-length 4 --consensus false --max-num-haplotypes-in-population 128 --error-correct-kmers false --min-pruning 2 --debug-graph-transformations false --kmer-length-for-read-error-correction 25 --min-observations-for-kmer-to-be-solid 20 --likelihood-calculation-engine PairHMM --base-quality-score-threshold 18 --pair-hmm-gap-continuation-penalty 10 --pair-hmm-implementation FASTEST_AVAILABLE --pcr-indel-model CONSERVATIVE --phred-scaled-global-read-mismapping-rate 45 --native-pair-hmm-threads 4 --native-pair-hmm-use-double-precision false --debug false --use-filtered-reads-for-annotations false --bam-writer-type CALLED_HAPLOTYPES --capture-assembly-failure-bam false --error-correct-reads false --do-not-run-physical-phasing false --min-base-quality-score 10 --smith-waterman JAVA --use-new-qual-calculator false --annotate-with-num-discovered-alleles false --heterozygosity 0.001 --indel-heterozygosity 1.25E-4 --heterozygosity-stdev 0.01 --max-alternate-alleles 6 '..b'MLEAF=1;MQ=60;QD=16.87;SOR=2.303;ANN=A|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007300.3|Coding|10/24|c.2612C>T|p.Pro871Leu|2844/7270|2612/5655|871/1884||,A|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007297.3|Coding|9/22|c.2471C>T|p.Pro824Leu|2752/7115|2471/5451|824/1816||,A|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007294.3|Coding|10/23|c.2612C>T|p.Pro871Leu|2844/7207|2612/5592|871/1863||,A|intron_variant|MODIFIER|BRCA1|BRCA1|transcript|NM_007298.3|Coding|9/21|c.787+1825C>T||||||,A|intron_variant|MODIFIER|BRCA1|BRCA1|transcript|NM_007299.3|Coding|10/21|c.787+1825C>T||||||,A|non_coding_exon_variant|MODIFIER|BRCA1|BRCA1|transcript|NR_027676.1|Noncoding|10/23|n.2748C>T||||||;COSMIC_ID=COSM3755564;TISSUE=prostate;TUMOR=carcinoma_--_NS;FATHMM=NEUTRAL;SOMATIC=Confirmed_somatic_variant;CHASM_PVALUE=0.0004;CHASM_FDR=0.05;VEST_PVALUE=0.33539;VEST_FDR=1\tGT:AD:DP:GQ:PL\t1/1:0,2:2:6:61,6,0\n+chr17\t43093454\t.\tC\tT\t425.77\tPASS\tGENE=BRCA1;AC=2;AF=1;AN=2;DP=11;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=60;QD=31.78;SOR=1.27;ANN=T|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007300.3|Coding|10/24|c.2077G>A|p.Asp693Asn|2309/7270|2077/5655|693/1884||,T|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007297.3|Coding|9/22|c.1936G>A|p.Asp646Asn|2217/7115|1936/5451|646/1816||,T|missense_variant|MODERATE|BRCA1|BRCA1|transcript|NM_007294.3|Coding|10/23|c.2077G>A|p.Asp693Asn|2309/7207|2077/5592|693/1863||,T|intron_variant|MODIFIER|BRCA1|BRCA1|transcript|NM_007298.3|Coding|9/21|c.787+1290G>A||||||,T|intron_variant|MODIFIER|BRCA1|BRCA1|transcript|NM_007299.3|Coding|10/21|c.787+1290G>A||||||,T|non_coding_exon_variant|MODIFIER|BRCA1|BRCA1|transcript|NR_027676.1|Noncoding|10/23|n.2213G>A||||||;COSMIC_ID=COSM4989394;TISSUE=soft_tissue;TUMOR=rhabdomyosarcoma_--_embryonal;FATHMM=NEUTRAL;SOMATIC=Reported_in_another_cancer_sample_as_somatic;CHASM_PVALUE=0.0014;CHASM_FDR=0.05;VEST_PVALUE=0.51068;VEST_FDR=1\tGT:AD:DP:GQ:PL\t1/1:0,11:11:33:454,33,0\n+chr19\t39177761\t.\tG\tC\t1134.77\tPASS\tGENE=PAK4;AC=1;AF=0.5;AN=2;BaseQRankSum=2.258;DP=106;ExcessHet=3.0103;FS=0;MLEAC=1;MLEAF=0.5;MQ=60;MQRankSum=0;QD=10.71;ReadPosRankSum=1.397;SOR=0.654;ANN=C|missense_variant|MODERATE|PAK4|PAK4|transcript|NM_001014831.2|Coding|10/11|c.1572G>C|p.Met524Ile|2033/3064|1572/1776|524/591||,C|missense_variant|MODERATE|PAK4|PAK4|transcript|NM_001014832.1|Coding|8/9|c.1572G>C|p.Met524Ile|1734/2765|1572/1776|524/591||,C|missense_variant|MODERATE|PAK4|PAK4|transcript|NM_001014834.2|Coding|7/8|c.1113G>C|p.Met371Ile|1275/2306|1113/1317|371/438||,C|missense_variant|MODERATE|PAK4|PAK4|transcript|NM_001014835.1|Coding|8/9|c.1113G>C|p.Met371Ile|1348/2379|1113/1317|371/438||,C|missense_variant|MODERATE|PAK4|PAK4|transcript|NM_005884.3|Coding|9/10|c.1572G>C|p.Met524Ile|1807/2838|1572/1776|524/591||;CHASM_PVALUE=0.0004;CHASM_FDR=0.05;VEST_PVALUE=0.01093;VEST_FDR=0.15\tGT:AD:DP:GQ:PL\t0/1:58,48:106:99:1163,0,1435\n+chr19\t47271515\t.\tT\tC\t336.77\tPASS\tGENE=CCDC9;AC=2;AF=1;AN=2;DP=12;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=60;QD=28.06;SOR=2.67;ANN=C|missense_variant|MODERATE|CCDC9|CCDC9|transcript|NM_015603.2|Coding|12/12|c.1433T>C|p.Leu478Pro|1640/2078|1433/1596|478/531||;COSMIC_ID=COSM3721172;TISSUE=haematopoietic_and_lymphoid_tissue;TUMOR=haematopoietic_neoplasm_--_acute_myeloid_leukaemia;FATHMM=NEUTRAL;SOMATIC=Confirmed_somatic_variant;CHASM_PVALUE=0.093;CHASM_FDR=0.15;VEST_PVALUE=0.97622;VEST_FDR=1\tGT:AD:DP:GQ:PL\t1/1:0,12:12:36:365,36,0\n+chr20\t46687147\t.\tC\tT\t423.77\tPASS\tGENE=TP53RK;AC=1;AF=0.5;AN=2;BaseQRankSum=-0.26;DP=26;ExcessHet=3.0103;FS=1.657;MLEAC=1;MLEAF=0.5;MQ=60;MQRankSum=0;QD=16.3;ReadPosRankSum=1.395;SOR=1.179;ANN=T|missense_variant|MODERATE|TP53RK|TP53RK|transcript|NM_033550.3|Coding|2/2|c.368G>A|p.Arg123Gln|591/3373|368/762|123/253||;COSMIC_ID=COSM3758608;TISSUE=pancreas;TUMOR=carcinoma_--_ductal_carcinoma;FATHMM=PATHOGENIC;SOMATIC=Confirmed_somatic_variant;CHASM_PVALUE=0.0834;CHASM_FDR=0.15;VEST_PVALUE=0.88584;VEST_FDR=1\tGT:AD:DP:GQ:PL\t0/1:8,18:26:99:452,0,165\n'
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/misc/recalibrated.bai
b
Binary file ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/misc/recalibrated.bai has changed
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/misc/recalibrated.bam
b
Binary file ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/misc/recalibrated.bam has changed
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/mutation_inspector.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/mutation_inspector.json Wed Nov 14 12:20:23 2018 -0500
[
b'@@ -0,0 +1,321 @@\n+{\n+  "BAM": "/broad/hptmp/bankapur/full_mut/varcalling.outdir/misc/recalibrated.bam", \n+  "BAM_INDEX": "/broad/hptmp/bankapur/full_mut/varcalling.outdir/misc/recalibrated.bai", \n+  "BED": "/seq/regev_genome_portal/RESOURCES/CTAT_GENOME_LIB/GRCh38_v27_CTAT_lib_Feb092018/ctat_genome_lib_build_dir/ctat_mutation_lib/refGene.sort.bed", \n+  "BED_INDEX": "/seq/regev_genome_portal/RESOURCES/CTAT_GENOME_LIB/GRCh38_v27_CTAT_lib_Feb092018/ctat_genome_lib_build_dir/ctat_mutation_lib/refGene.sort.bed.idx", \n+  "SAMPLE": "varcalling.outdir", \n+  "SNV": [\n+    {\n+      "ALT": "G", \n+      "CHASM_FDR": "0.2", \n+      "CHASM_PVALUE": "0.1114", \n+      "CHROM": "chr5", \n+      "COSMIC_ID": "COSM4006021", \n+      "DP": "4", \n+      "GENE": "LOC100288152,SLC9A3", \n+      "KGPROD": "NA", \n+      "MQ": "60", \n+      "NSF": "NA", \n+      "NSM": "NA", \n+      "NSN": "NA", \n+      "PMC": "NA", \n+      "POS": "474989", \n+      "QUAL": "96.03", \n+      "REF": "A", \n+      "RS": "NA", \n+      "SAO": "NA", \n+      "TISSUE": "urinary_tract", \n+      "TUMOR": "carcinoma_--_NS", \n+      "VEST_FDR": "1", \n+      "VEST_PVALUE": "0.96802"\n+    }, \n+    {\n+      "ALT": "A", \n+      "CHASM_FDR": "0.15", \n+      "CHASM_PVALUE": "0.0694", \n+      "CHROM": "chr5", \n+      "COSMIC_ID": "NA", \n+      "DP": "45", \n+      "GENE": "TRIM41", \n+      "KGPROD": "NA", \n+      "MQ": "60", \n+      "NSF": "NA", \n+      "NSM": "NA", \n+      "NSN": "NA", \n+      "PMC": "NA", \n+      "POS": "181224474", \n+      "QUAL": "349.77", \n+      "REF": "G", \n+      "RS": "NA", \n+      "SAO": "NA", \n+      "TISSUE": "NA", \n+      "TUMOR": "NA", \n+      "VEST_FDR": "1", \n+      "VEST_PVALUE": "0.48052"\n+    }, \n+    {\n+      "ALT": "A", \n+      "CHASM_FDR": "0.1", \n+      "CHASM_PVALUE": "0.0344", \n+      "CHROM": "chr8", \n+      "COSMIC_ID": "COSM3750086", \n+      "DP": "66", \n+      "GENE": "PLEC", \n+      "KGPROD": "NA", \n+      "MQ": "60", \n+      "NSF": "NA", \n+      "NSM": "NA", \n+      "NSN": "NA", \n+      "PMC": "NA", \n+      "POS": "143923759", \n+      "QUAL": "838.77", \n+      "REF": "G", \n+      "RS": "NA", \n+      "SAO": "NA", \n+      "TISSUE": "large_intestine", \n+      "TUMOR": "carcinoma_--_adenocarcinoma", \n+      "VEST_FDR": "1", \n+      "VEST_PVALUE": "0.84202"\n+    }, \n+    {\n+      "ALT": "A", \n+      "CHASM_FDR": "0.15", \n+      "CHASM_PVALUE": "0.0744", \n+      "CHROM": "chr12", \n+      "COSMIC_ID": "COSM3753397", \n+      "DP": "48", \n+      "GENE": "TIMELESS", \n+      "KGPROD": "NA", \n+      "MQ": "60", \n+      "NSF": "NA", \n+      "NSM": "NA", \n+      "NSN": "NA", \n+      "PMC": "NA", \n+      "POS": "56420869", \n+      "QUAL": "384.77", \n+      "REF": "G", \n+      "RS": "NA", \n+      "SAO": "NA", \n+      "TISSUE": "large_intestine", \n+      "TUMOR": "carcinoma_--_adenocarcinoma", \n+      "VEST_FDR": "0.95", \n+      "VEST_PVALUE": "0.18439"\n+    }, \n+    {\n+      "ALT": "T", \n+      "CHASM_FDR": "0.05", \n+      "CHASM_PVALUE": "0", \n+      "CHROM": "chr17", \n+      "COSMIC_ID": "COSM3717625", \n+      "DP": "61", \n+      "GENE": "TP53", \n+      "KGPROD": "NA", \n+      "MQ": "60", \n+      "NSF": "NA", \n+      "NSM": "NA", \n+      "NSN": "NA", \n+      "PMC": "NA", \n+      "POS": "7673767", \n+      "QUAL": "1848.77", \n+      "REF": "C", \n+      "RS": "NA", \n+      "SAO": "NA", \n+      "TISSUE": "bone", \n+      "TUMOR": "Ewings_sarcoma-peripheral_primitive_neuroectodermal_tumour_--_NS", \n+      "VEST_FDR": "0.15", \n+      "VEST_PVALUE": "0.01447"\n+    }, \n+    {\n+      "ALT": "C", \n+      "CHASM_FDR": "0.15", \n+      "CHASM_PVALUE": "0.087", \n+      "CHROM": "chr17", \n+      "COSMIC_ID": "COSM3766193", \n+      "DP": "80", \n+      "GENE": "TP53", \n+      "KGPROD": "NA", \n+      "MQ": "60", \n+      "NSF": "NA", \n+      "NSM": "NA", \n+      "NSN": "NA", \n+      "PMC": "NA", \n+      "POS": "7676154", \n+      "QUAL": "2161.77", \n+      "REF": "G", \n+      "RS": "NA", \n+      "SAO": "NA", \n+      "TISSUE": "haematopoietic_and_lymphoid_tissue", \n+      "TUMOR":'..b'"4", \n+      "GENE": "BRCA1", \n+      "KGPROD": "NA", \n+      "MQ": "60", \n+      "NSF": "NA", \n+      "NSM": "NA", \n+      "NSN": "NA", \n+      "PMC": "NA", \n+      "POS": "43071077", \n+      "QUAL": "92.03", \n+      "REF": "T", \n+      "RS": "NA", \n+      "SAO": "NA", \n+      "TISSUE": "haematopoietic_and_lymphoid_tissue", \n+      "TUMOR": "haematopoietic_neoplasm_--_acute_myeloid_leukaemia", \n+      "VEST_FDR": "1", \n+      "VEST_PVALUE": "0.3446"\n+    }, \n+    {\n+      "ALT": "C", \n+      "CHASM_FDR": "0.05", \n+      "CHASM_PVALUE": "0.0002", \n+      "CHROM": "chr17", \n+      "COSMIC_ID": "COSM3755561", \n+      "DP": "4", \n+      "GENE": "BRCA1", \n+      "KGPROD": "NA", \n+      "MQ": "60", \n+      "NSF": "NA", \n+      "NSM": "NA", \n+      "NSN": "NA", \n+      "PMC": "NA", \n+      "POS": "43091983", \n+      "QUAL": "84.03", \n+      "REF": "T", \n+      "RS": "NA", \n+      "SAO": "NA", \n+      "TISSUE": "soft_tissue", \n+      "TUMOR": "haemangioblastoma_--_NS", \n+      "VEST_FDR": "1", \n+      "VEST_PVALUE": "0.64447"\n+    }, \n+    {\n+      "ALT": "A", \n+      "CHASM_FDR": "0.05", \n+      "CHASM_PVALUE": "0.0004", \n+      "CHROM": "chr17", \n+      "COSMIC_ID": "COSM3755564", \n+      "DP": "2", \n+      "GENE": "BRCA1", \n+      "KGPROD": "NA", \n+      "MQ": "60", \n+      "NSF": "NA", \n+      "NSM": "NA", \n+      "NSN": "NA", \n+      "PMC": "NA", \n+      "POS": "43092919", \n+      "QUAL": "33.74", \n+      "REF": "G", \n+      "RS": "NA", \n+      "SAO": "NA", \n+      "TISSUE": "prostate", \n+      "TUMOR": "carcinoma_--_NS", \n+      "VEST_FDR": "1", \n+      "VEST_PVALUE": "0.33539"\n+    }, \n+    {\n+      "ALT": "T", \n+      "CHASM_FDR": "0.05", \n+      "CHASM_PVALUE": "0.0014", \n+      "CHROM": "chr17", \n+      "COSMIC_ID": "COSM4989394", \n+      "DP": "11", \n+      "GENE": "BRCA1", \n+      "KGPROD": "NA", \n+      "MQ": "60", \n+      "NSF": "NA", \n+      "NSM": "NA", \n+      "NSN": "NA", \n+      "PMC": "NA", \n+      "POS": "43093454", \n+      "QUAL": "425.77", \n+      "REF": "C", \n+      "RS": "NA", \n+      "SAO": "NA", \n+      "TISSUE": "soft_tissue", \n+      "TUMOR": "rhabdomyosarcoma_--_embryonal", \n+      "VEST_FDR": "1", \n+      "VEST_PVALUE": "0.51068"\n+    }, \n+    {\n+      "ALT": "C", \n+      "CHASM_FDR": "0.05", \n+      "CHASM_PVALUE": "0.0004", \n+      "CHROM": "chr19", \n+      "COSMIC_ID": "NA", \n+      "DP": "106", \n+      "GENE": "PAK4", \n+      "KGPROD": "NA", \n+      "MQ": "60", \n+      "NSF": "NA", \n+      "NSM": "NA", \n+      "NSN": "NA", \n+      "PMC": "NA", \n+      "POS": "39177761", \n+      "QUAL": "1134.77", \n+      "REF": "G", \n+      "RS": "NA", \n+      "SAO": "NA", \n+      "TISSUE": "NA", \n+      "TUMOR": "NA", \n+      "VEST_FDR": "0.15", \n+      "VEST_PVALUE": "0.01093"\n+    }, \n+    {\n+      "ALT": "C", \n+      "CHASM_FDR": "0.15", \n+      "CHASM_PVALUE": "0.093", \n+      "CHROM": "chr19", \n+      "COSMIC_ID": "COSM3721172", \n+      "DP": "12", \n+      "GENE": "CCDC9", \n+      "KGPROD": "NA", \n+      "MQ": "60", \n+      "NSF": "NA", \n+      "NSM": "NA", \n+      "NSN": "NA", \n+      "PMC": "NA", \n+      "POS": "47271515", \n+      "QUAL": "336.77", \n+      "REF": "T", \n+      "RS": "NA", \n+      "SAO": "NA", \n+      "TISSUE": "haematopoietic_and_lymphoid_tissue", \n+      "TUMOR": "haematopoietic_neoplasm_--_acute_myeloid_leukaemia", \n+      "VEST_FDR": "1", \n+      "VEST_PVALUE": "0.97622"\n+    }, \n+    {\n+      "ALT": "T", \n+      "CHASM_FDR": "0.15", \n+      "CHASM_PVALUE": "0.0834", \n+      "CHROM": "chr20", \n+      "COSMIC_ID": "COSM3758608", \n+      "DP": "26", \n+      "GENE": "TP53RK", \n+      "KGPROD": "NA", \n+      "MQ": "60", \n+      "NSF": "NA", \n+      "NSM": "NA", \n+      "NSN": "NA", \n+      "PMC": "NA", \n+      "POS": "46687147", \n+      "QUAL": "423.77", \n+      "REF": "C", \n+      "RS": "NA", \n+      "SAO": "NA", \n+      "TISSUE": "pancreas", \n+      "TUMOR": "carcinoma_--_ductal_carcinoma", \n+      "VEST_FDR": "1", \n+      "VEST_PVALUE": "0.88584"\n+    }\n+  ]\n+}\n\\ No newline at end of file\n'
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/variants.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/variants.vcf Wed Nov 14 12:20:23 2018 -0500
b
b'@@ -0,0 +1,79 @@\n+##fileformat=VCFv4.2\n+##FILTER=<ID=LowQual,Description="Low quality">\n+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">\n+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">\n+##GATKCommandLine=<ID=HaplotypeCaller,CommandLine="HaplotypeCaller  --recover-dangling-heads true --dont-use-soft-clipped-bases true --standard-min-confidence-threshold-for-calling 20.0 --output /broad/hptmp/bankapur/full_mut/varcalling.outdir/variants.vcf --input /broad/hptmp/bankapur/full_mut/varcalling.outdir/misc/recalibrated.bam --reference /seq/regev_genome_portal/RESOURCES/CTAT_GENOME_LIB/GRCh38_v27_CTAT_lib_Feb092018/ctat_genome_lib_build_dir/ref_genome.fa  --emit-ref-confidence NONE --gvcf-gq-bands 1 --gvcf-gq-bands 2 --gvcf-gq-bands 3 --gvcf-gq-bands 4 --gvcf-gq-bands 5 --gvcf-gq-bands 6 --gvcf-gq-bands 7 --gvcf-gq-bands 8 --gvcf-gq-bands 9 --gvcf-gq-bands 10 --gvcf-gq-bands 11 --gvcf-gq-bands 12 --gvcf-gq-bands 13 --gvcf-gq-bands 14 --gvcf-gq-bands 15 --gvcf-gq-bands 16 --gvcf-gq-bands 17 --gvcf-gq-bands 18 --gvcf-gq-bands 19 --gvcf-gq-bands 20 --gvcf-gq-bands 21 --gvcf-gq-bands 22 --gvcf-gq-bands 23 --gvcf-gq-bands 24 --gvcf-gq-bands 25 --gvcf-gq-bands 26 --gvcf-gq-bands 27 --gvcf-gq-bands 28 --gvcf-gq-bands 29 --gvcf-gq-bands 30 --gvcf-gq-bands 31 --gvcf-gq-bands 32 --gvcf-gq-bands 33 --gvcf-gq-bands 34 --gvcf-gq-bands 35 --gvcf-gq-bands 36 --gvcf-gq-bands 37 --gvcf-gq-bands 38 --gvcf-gq-bands 39 --gvcf-gq-bands 40 --gvcf-gq-bands 41 --gvcf-gq-bands 42 --gvcf-gq-bands 43 --gvcf-gq-bands 44 --gvcf-gq-bands 45 --gvcf-gq-bands 46 --gvcf-gq-bands 47 --gvcf-gq-bands 48 --gvcf-gq-bands 49 --gvcf-gq-bands 50 --gvcf-gq-bands 51 --gvcf-gq-bands 52 --gvcf-gq-bands 53 --gvcf-gq-bands 54 --gvcf-gq-bands 55 --gvcf-gq-bands 56 --gvcf-gq-bands 57 --gvcf-gq-bands 58 --gvcf-gq-bands 59 --gvcf-gq-bands 60 --gvcf-gq-bands 70 --gvcf-gq-bands 80 --gvcf-gq-bands 90 --gvcf-gq-bands 99 --indel-size-to-eliminate-in-ref-model 10 --use-alleles-trigger false --disable-optimizations false --just-determine-active-regions false --dont-genotype false --max-mnp-distance 0 --dont-trim-active-regions false --max-disc-ar-extension 25 --max-gga-ar-extension 300 --padding-around-indels 150 --padding-around-snps 20 --kmer-size 10 --kmer-size 25 --dont-increase-kmer-sizes-for-cycles false --allow-non-unique-kmers-in-ref false --num-pruning-samples 1 --do-not-recover-dangling-branches false --min-dangling-branch-length 4 --consensus false --max-num-haplotypes-in-population 128 --error-correct-kmers false --min-pruning 2 --debug-graph-transformations false --kmer-length-for-read-error-correction 25 --min-observations-for-kmer-to-be-solid 20 --likelihood-calculation-engine PairHMM --base-quality-score-threshold 18 --pair-hmm-gap-continuation-penalty 10 --pair-hmm-implementation FASTEST_AVAILABLE --pcr-indel-model CONSERVATIVE --phred-scaled-global-read-mismapping-rate 45 --native-pair-hmm-threads 4 --native-pair-hmm-use-double-precision false --debug false --use-filtered-reads-for-annotations false --bam-writer-type CALLED_HAPLOTYPES --capture-assembly-failure-bam false --error-correct-reads false --do-not-run-physical-phasing false --min-base-quality-score 10 --smith-waterman JAVA --use-new-qual-calculator false --annotate-with-num-discovered-alleles false --heterozygosity 0.001 --indel-heterozygosity 1.25E-4 --heterozygosity-stdev 0.01 --max-alternate-alleles 6 --max-genotype-count 1024 --sample-ploidy 2 --num-reference-samples-if-no-call 0 --genotyping-mode DISCOVERY --genotype-filtered-alleles false --contamination-fraction-to-filter 0.0 --output-mode EMI'..b'0.323;SOR=1.525\tGT:AD:DP:GQ:PL\t0/1:38,22:60:99:758,0,1811\n+chr8\t143924022\t.\tA\tG\t587.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-2.271;DP=58;ExcessHet=3.0103;FS=14.916;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=10.13;ReadPosRankSum=0.674;SOR=2.948\tGT:AD:DP:GQ:PL\t0/1:40,18:58:99:616,0,1892\n+chr12\t56420869\t.\tG\tA\t384.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.348;DP=48;ExcessHet=3.0103;FS=4.262;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=8.02;ReadPosRankSum=0.096;SOR=0.674\tGT:AD:DP:GQ:PL\t0/1:30,18:48:99:413,0,759\n+chr12\t56420872\t.\tA\tG\t695.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.308;DP=46;ExcessHet=3.0103;FS=8.803;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=15.13;ReadPosRankSum=0.656;SOR=1.232\tGT:AD:DP:GQ:PL\t0/1:18,28:46:99:724,0,415\n+chr12\t56422138\t.\tC\tT\t72.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=1.718;DP=8;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=9.10;ReadPosRankSum=-0.816;SOR=0.693\tGT:AD:DP:GQ:PL\t0/1:4,4:8:96:101,0,96\n+chr17\t7673767\t.\tC\tT\t1848.77\t.\tAC=2;AF=1.00;AN=2;DP=61;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=30.31;SOR=1.609\tGT:AD:DP:GQ:PL\t1/1:0,61:61:99:1877,183,0\n+chr17\t7675327\t.\tC\tT\t32.74\t.\tAC=2;AF=1.00;AN=2;DP=2;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=16.37;SOR=0.693\tGT:AD:DP:GQ:PL\t1/1:0,2:2:6:60,6,0\n+chr17\t7676154\t.\tG\tC\t2161.77\t.\tAC=2;AF=1.00;AN=2;BaseQRankSum=3.225;DP=80;ExcessHet=3.0103;FS=14.289;MLEAC=2;MLEAF=1.00;MQ=60.00;MQRankSum=0.000;QD=27.02;ReadPosRankSum=-2.023;SOR=0.642\tGT:AD:DP:GQ:PL\t1/1:4,76:80:99:2190,183,0\n+chr17\t43071077\t.\tT\tC\t92.03\t.\tAC=2;AF=1.00;AN=2;DP=4;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=23.01;SOR=3.258\tGT:AD:DP:GQ:PL\t1/1:0,4:4:12:120,12,0\n+chr17\t43082453\t.\tA\tG\t37.74\t.\tAC=2;AF=1.00;AN=2;DP=2;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=18.87;SOR=2.303\tGT:AD:DP:GQ:PL\t1/1:0,2:2:6:65,6,0\n+chr17\t43091983\t.\tT\tC\t84.03\t.\tAC=2;AF=1.00;AN=2;DP=4;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=21.01;SOR=0.693\tGT:AD:DP:GQ:PL\t1/1:0,4:4:12:112,12,0\n+chr17\t43092919\t.\tG\tA\t33.74\t.\tAC=2;AF=1.00;AN=2;DP=2;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=16.87;SOR=2.303\tGT:AD:DP:GQ:PL\t1/1:0,2:2:6:61,6,0\n+chr17\t43093220\t.\tA\tG\t278.77\t.\tAC=2;AF=1.00;AN=2;DP=10;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=27.88;SOR=4.804\tGT:AD:DP:GQ:PL\t1/1:0,10:10:30:307,30,0\n+chr17\t43093449\t.\tG\tA\t425.77\t.\tAC=2;AF=1.00;AN=2;DP=9;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=32.87;SOR=1.402\tGT:AD:DP:GQ:PL\t1/1:0,9:9:33:454,33,0\n+chr17\t43093454\t.\tC\tT\t425.77\t.\tAC=2;AF=1.00;AN=2;DP=11;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=31.78;SOR=1.270\tGT:AD:DP:GQ:PL\t1/1:0,11:11:33:454,33,0\n+chr19\t39177761\t.\tG\tC\t1134.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=2.258;DP=106;ExcessHet=3.0103;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=10.71;ReadPosRankSum=1.397;SOR=0.654\tGT:AD:DP:GQ:PL\t0/1:58,48:106:99:1163,0,1435\n+chr19\t39178960\t.\tA\tATG\t226.77\t.\tAC=2;AF=1.00;AN=2;DP=12;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=32.40;SOR=4.174\tGT:AD:DP:GQ:PL\t1/1:0,7:7:21:264,21,0\n+chr19\t39179002\t.\tT\tC\t31.74\t.\tAC=2;AF=1.00;AN=2;DP=2;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=15.87;SOR=2.303\tGT:AD:DP:GQ:PL\t1/1:0,2:2:6:59,6,0\n+chr19\t47271315\t.\tC\tT\t513.77\t.\tAC=2;AF=1.00;AN=2;BaseQRankSum=0.932;DP=21;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;MQRankSum=0.000;QD=25.69;ReadPosRankSum=-1.045;SOR=0.264\tGT:AD:DP:GQ:PL\t1/1:1,19:20:30:542,30,0\n+chr19\t47271515\t.\tT\tC\t336.77\t.\tAC=2;AF=1.00;AN=2;DP=12;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=28.06;SOR=2.670\tGT:AD:DP:GQ:PL\t1/1:0,12:12:36:365,36,0\n+chr19\t47272198\t.\tG\tT\t21.77\t.\tAC=2;AF=1.00;AN=2;DP=2;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=10.88;SOR=0.693\tGT:AD:DP:GQ:PL\t1/1:0,2:2:6:49,6,0\n+chr20\t46687147\t.\tC\tT\t423.77\t.\tAC=1;AF=0.500;AN=2;BaseQRankSum=-0.260;DP=26;ExcessHet=3.0103;FS=1.657;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.000;QD=16.30;ReadPosRankSum=1.395;SOR=1.179\tGT:AD:DP:GQ:PL\t0/1:8,18:26:99:452,0,165\n'
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/variants.vcf.idx
b
Binary file ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/variants.vcf.idx has changed
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/variants_initial_filtering_clean_snp_RNAedit.vcf_snpeff_updated.vcf.gz
b
Binary file ctat_mutations-7cf8f5889a4d/test-data/varcalling.outdir/variants_initial_filtering_clean_snp_RNAedit.vcf_snpeff_updated.vcf.gz has changed
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations-7cf8f5889a4d/tool-data/cravat_tissues.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ctat_mutations-7cf8f5889a4d/tool-data/cravat_tissues.loc.sample Wed Nov 14 12:20:23 2018 -0500
b
@@ -0,0 +1,27 @@
+Bladder   Bladder Urothelial Carcinoma  BLCA (TCGA)   Jun 2013
+Blood-Lymphocyte  Chronic Lymphocytic Leukemia  CLL (ICGC)  Mar 2013
+Blood-Myeloid   Acute Myeloid Leukemia  LAML (TCGA)   Jun 2013
+Brain-Cerebellum  Medulloblastoma   MB (mixed source)   Dec 2010
+Brain-Glioblastoma-Multiforme   Glioblastoma Multiforme   GBM (TCGA)  Jun 2013
+Brain-Lower-Grade-Glioma  Brain Lower Grade Glioma  LGG (TCGA)  Jun 2013
+Breast  Breast Invasive Carcinoma BRCA (TCGA) Jun 12012
+Cervix  Cervical Squamous Cell Carcinoma and Endocervical Adenocarcinoma  CESC (TCGA) Jun 2013
+Colon Colon Adenocarcinoma  COAD (TCGA) Jun 2013
+Head and Neck Head and Neck Squamous Cell Carcinoma HNSC (TCGA) Jun 2013
+Kidney-Chromophobe  Kidney Chromophobe  KICH (TCGA) Jun 2013
+Kidney-Clear-Cell Kidney Renal Clear Cell Carcinoma KIRC (TCGA) Jun 2013
+Kidney-Papillary-Cell Kidney Renal Papillary Cell Carcinoma KIRP (TCGA) Jun 2013
+Liver-Nonviral  Hepatocellular Carcinoma (Secondary to Alcohol and Adiposity) HCCA (ICGC) Mar 2013
+Liver-Viral Hepatocellular Carcinoma (Viral)  HCCV (ICGC) Mar 2013
+Lung-Adenocarcinoma Lung Adenocarcinoma LUAD (TCGA) Jun 2013
+Lung-Squamous Cell  Lung Squamous Cell Carcinoma  LUSC (TCGA) Jun 2013
+Melanoma  Melanoma  ML (Yardena Samuels lab)  Dec 2011
+Other  General purpose  OV (TCGA) Jun 2013
+Ovary Ovarian Serous Cystadenocarcinoma OV (TCGA) Jun 2013
+Pancreas  Pancreatic Cancer PNCC (ICGC))  Mar 2013
+Prostate-Adenocarcinoma Prostate Adenocarcinoma PRAD (TCGA) Jun 2013
+Rectum  Rectum Adenocarcinoma READ (TCGA) Jun 2013
+Skin  Skin Cutaneous Melanoma SKCM (TCGA) Jun 2013
+Stomach Stomach Adenocarcinoma  STAD (TCGA) Jun 2013
+Thyroid Thyroid Carcinoma THCA (TCGA) Jun 2013
+Uterus  Uterine Corpus Endometriod Carcinoma  UCEC (TCGA) Jun 2013
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations-7cf8f5889a4d/tool-data/ctat_genome_resource_libs.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ctat_mutations-7cf8f5889a4d/tool-data/ctat_genome_resource_libs.loc.sample Wed Nov 14 12:20:23 2018 -0500
b
@@ -0,0 +1,15 @@
+# This file lists the locations of CTAT Genome Resource Libraries
+# Usually there will only be one library, but it is concievable 
+# that there could be multiple libraries.
+# This file format is as follows
+# (white space characters are TAB characters):
+#
+#<value>    <name>  <path>
+# value is a unique id
+# name is the display name
+# path is the directory where the genome resource lib files are stored
+#
+#ctat_genome_resource_libs.loc could look like:
+#
+#GRCh38_v27_CTAT_lib_Feb092018 CTAT_GenomeResourceLib_GRCh38_v27_CTAT_lib_Feb092018 /path/to/ctat/genome/resource/lib/directory
+#
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations-7cf8f5889a4d/tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ctat_mutations-7cf8f5889a4d/tool_data_table_conf.xml.sample Wed Nov 14 12:20:23 2018 -0500
b
@@ -0,0 +1,18 @@
+<tables>
+    <table name="ctat_genome_resource_libs" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ctat_genome_resource_libs.loc" />
+    </table>
+    <table name="cravat_tissues" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/cravat_tissues.loc" />
+    </table>
+    <table name="ctat_centrifuge_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ctat_centrifuge_indexes.loc" />
+    </table>
+    <table name="ctat_lncrna_annotations" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ctat_lncrna_annotations.loc" />
+    </table>
+</tables>
b
diff -r be2f3ce8562b -r c15d9049ab81 ctat_mutations.xml
--- a/ctat_mutations.xml Wed Nov 14 12:15:58 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,68 +0,0 @@
-<tool id="ctat_mutations" name="ctat_mutations" version="1.0.0" profile="17.05">
-    <description>Mutation Pipeline for calling SNPs and variants</description>
-    <requirements>
-        <requirement type="package" version="2.0.1">ctat-mutations</requirement>
-    </requirements>
-    <command detect_errors="default">
-      <![CDATA[
-            ctat_mutations \
-                   --plot \
-                   --out_dir varcalling.outdir \
-                   --threads 8 \
-                   --variant_filtering_mode GATK \
-                   --left "$left" \
-                   --right "$right" \
-                   --genome_lib_dir "${genome_resource_lib.fields.path}" \
-                   --variant_call_mode GATK \
-                   --tissue_type "$tissue_type" \
-                   --email "$cravat_email"
-      ]]>
-    </command>
-    <inputs>
-      <param format="fastq" name="left" type="data" label="Left/Forward strand reads" help="Left read"/>
-      <param format="fastq" name="right" type="data" label="Right/Reverse strand reads" help="Right read"/>
-      <param name="tissue_type" type="select" label="Select a pathology" help="If you don't know, just choose 'General Purpose'">
-        <options from_data_table="ctat_cravat_tissues">
-  <filter type="sort_by" column="1"/>
-  <validator type="no_options" message="No indexes are available for the selected input dataset"/>
-        </options>
-      </param>
-      <param name="genome_resource_lib" type="select" label="Select a reference genome">
-        <options from_data_table="ctat_genome_resource_libs">
-          <filter type="sort_by" column="2" />
-          <validator type="no_options" message="No indexes are available" />
-        </options>
-      </param>
-      <param name="cravat_email" type="text" label="CRAVAT email" help="CRAVAT registered email id"/>      
-      <section name="adv" title="This service uses the GATK4. GATK4 is licensed by the Broad Institute and is made available to academic users of this service for non-commercial use only. The full text of the license is available here: https://www.broadinstitute.org/gatk/about/license.html. For more information about GATK and full documentation, please visit the GATK website: https://www.broadinstitute.org." expanded="False">
-      </section>
-    </inputs>
-    <outputs>
-      <data format="tabular" name="cancertab" label="${tool.name} on ${on_string}: Cancer Tab" from_work_dir="varcalling.outdir/cancer.tab"/> 
-      <data format="vcf" name="cancerVariants" label="${tool.name} on ${on_string}: Cancer VCF" from_work_dir="varcalling.outdir/cancer.vcf"/> 
-      <data format="vcf" name="allVariants" label="${tool.name} on ${on_string}: All Variants VCF" from_work_dir="varcalling.outdir/variants.vcf"/> 
-      <data format="bam" name="bamfile" label="Bam used in haplotype calling" from_work_dir="varcalling.outdir/misc/recalibrated.bam"/>
-      <data format="txt" name="cravat" label="Annotated (lightly filtered) VCF file" from_work_dir="varcalling.outdir/variants_initial_filtering_clean_snp_RNAedit.vcf_snpeff_updated.vcf.gz"/>
-    </outputs>
-    <tests>
-        <test>
-           <param name="left" value="reads_1.fastq.gz"/>
-           <param name="right" value="reads_2.fastq.gz"/>
-           <param name="tissue_type" value="Other"/>
-           <output name="cancertab" file="varcalling.outdir/cancer.tab" />
-           <output name="cancerVariants" file="varcalling.outdir/cancer.vcf" />
-           <output name="allVariants" file="varcalling.outdir/variants.vcf" />
-           <output name="bamfile" file="varcalling.outdir/misc/recalibrated.bam" />
-           <output name="cravat" file="varcalling.outdir/variants_initial_filtering_clean_snp_RNAedit.vcf_snpeff_updated.vcf.gz"/>
-        </test>
-    </tests>
-    <help>
-
-.. class:: warningmark
-
-Mutation detection in RNA-Seq highlights the GATK Best Practices in RNA-Seq variant calling, several sources of variant annotation, and filtering based on CRAVAT.
-
-    </help>
-    <citations>
-    </citations>
-</tool>
b
diff -r be2f3ce8562b -r c15d9049ab81 tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample Wed Nov 14 12:15:58 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,18 +0,0 @@
-<tables>
-    <table name="ctat_genome_resource_libs" comment_char="#" allow_duplicate_entries="False">
-        <columns>value, name, path</columns>
-        <file path="tool-data/ctat_genome_resource_libs.loc" />
-    </table>
-    <table name="cravat_tissues" comment_char="#" allow_duplicate_entries="False">
-        <columns>value, name, path</columns>
-        <file path="tool-data/cravat_tissues.loc" />
-    </table>
-    <table name="ctat_centrifuge_indexes" comment_char="#" allow_duplicate_entries="False">
-        <columns>value, name, path</columns>
-        <file path="tool-data/ctat_centrifuge_indexes.loc" />
-    </table>
-    <table name="ctat_lncrna_annotations" comment_char="#" allow_duplicate_entries="False">
-        <columns>value, name, path</columns>
-        <file path="tool-data/ctat_lncrna_annotations.loc" />
-    </table>
-</tables>