Repository 'cravatool'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/cravatool

Changeset 1:2c7bcc1219fc (2018-08-16)
Previous changeset 0:83181dabeb90 (2018-05-18) Next changeset 2:f3027b8f28bd (2018-08-16)
Commit message:
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
added:
._cravatp_submit.py
._cravatp_submit.xml
cravatp_submit.py
cravatp_submit.xml
test-data/._variant.tsv
test-data/Freebayes.vcf
test-data/Freebayes_one-variant.vcf
test-data/Freebayes_special-cases.vcf
test-data/Freebayes_two-variants.vcf
test-data/MCF7_proBed.bed
test-data/error.tsv
test-data/gene.tsv
test-data/noncoding.tsv
test-data/results/intersected_vcf.vcf
test-data/variant.tsv
removed:
cravat_submit.py
cravat_submit.xml
test-data/[PepPointer].bed
test-data/[VCF-BEDintersect__on_data_65_and_data_6].vcf
test-results/Additional_Details.tsv
test-results/Gene_Level_Analysis.tsv
test-results/Input_Errors.Result.tsv
test-results/Variant_Non-coding.Result.tsv
test-results/Variant_Result.tsv
test-results/combined_variants.tsv
b
diff -r 83181dabeb90 -r 2c7bcc1219fc ._cravatp_submit.py
b
Binary file ._cravatp_submit.py has changed
b
diff -r 83181dabeb90 -r 2c7bcc1219fc ._cravatp_submit.xml
b
Binary file ._cravatp_submit.xml has changed
b
diff -r 83181dabeb90 -r 2c7bcc1219fc cravat_submit.py
--- a/cravat_submit.py Fri May 18 13:25:29 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,287 +0,0 @@\n-import requests\n-import json\n-import time\n-import urllib\n-import sys\n-import csv\n-import re\n-import math\n-from difflib import SequenceMatcher\n-from xml.etree import ElementTree as ET\n-import sqlite3\n-\n-try:\n-    input_filename = sys.argv[1]\n-    input_select_bar = sys.argv[2]\n-    GRCh_build = sys.argv[3]\n-    probed_filename = sys.argv[4]\n-    output_filename = sys.argv[5]\n-    file_3 = sys.argv[6]\n-    file_4 = sys.argv[7]\n-    file_5 = sys.argv[8]\n-except:\n-    # Filenames for testing.\n-    input_filename = \'test-data/[VCF-BEDintersect__on_data_65_and_data_6].vcf\'\n-    probed_filename = \'test-data/[PepPointer].bed\'\n-    input_select_bar = \'VEST\'\n-    GRCh_build = \'GRCh38\'\n-    output_filename = \'combined_variants.tsv\'\n-    file_3 = \'test-results/Gene_Level_Analysis.tsv\'\n-    file_4 = \'test-results/Variant_Non-coding.Result.tsv\'\n-    file_5 = \'test-results/Input_Errors.Result.tsv\'\n-    matches_filename = \'matches.tsv\'\n-\n-def getSequence(transcript_id):\n-    server = \'http://rest.ensembl.org\'\n-    ext = \'/sequence/id/\' + transcript_id + \'?content-type=text/x-seqxml%2Bxml;multiple_sequences=1;type=protein\'\n-    req = requests.get(server+ext, headers={ "Content-Type" : "text/plain"})\n-    \n-    if not req.ok:\n-        return None\n-    \n-    root = ET.fromstring(req.content)\n-    for child in root.iter(\'AAseq\'):\n-        return child.text\n-\n-\n-write_header = True\n-\n-GRCh37hg19 = \'off\'\n-if GRCh_build == \'GRCh37\':\n-    GRCh37hg19 = \'on\'\n-\n-#plugs in params to given URL\n-submit = requests.post(\'http://staging.cravat.us/CRAVAT/rest/service/submit\', files={\'inputfile\':open(input_filename)}, data={\'email\':\'znylund@insilico.us.com\', \'analyses\': input_select_bar, \'hg19\': GRCh37hg19})\n-\n-#Makes the data a json dictionary, takes out only the job ID\n-jobid = json.loads(submit.text)[\'jobid\']\n-\n-#out_file.write(jobid)    \n-submitted = json.loads(submit.text)[\'status\']\n-#out_file.write(\'\\t\' + submitted)\n-\n-input_file = open(input_filename)\n-\n-# Loads the proBED file as a list. \n-if (probed_filename != \'None\'):\n-    proBED = []\n-    with open(probed_filename) as tsvin:\n-        tsvreader = csv.reader(tsvin, delimiter=\'\\t\')\n-        for i, row in enumerate(tsvreader):\n-            proBED.append(row)\n-  \n-#loops until we find a status equal to Success, then breaks\n-while True:\n-    check = requests.get(\'http://staging.cravat.us/CRAVAT/rest/service/status\', params={\'jobid\': jobid})\n-    status = json.loads(check.text)[\'status\']\n-    resultfileurl = json.loads(check.text)[\'resultfileurl\']\n-    #out_file.write(str(status) + \', \')\n-    if status == \'Success\':\n-        #out_file.write(\'\\t\' + resultfileurl)\n-        break\n-    else:\n-        time.sleep(2)\n-        \n-#out_file.write(\'\\n\')\n-\n-#creates three files\n-file_1 = \'Variant_Result.tsv\'\n-file_2 = \'Additional_Details.tsv\'\n-#file_3 = time.strftime("%H:%M") + \'Combined_Variant_Results.tsv\'\n-\n-#Downloads the tabular results\n-urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Variant.Result.tsv", file_1)\n-urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Variant_Additional_Details.Result.tsv", file_2)\n-urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Gene_Level_Analysis.Result.tsv", file_3)\n-urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Variant_Non-coding.Result.tsv", file_4)\n-urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Input_Errors.Result.tsv", file_5)\n-\n-#opens the Variant Result file and the Variant Additional Details file as csv readers, then opens the output file (galaxy) as a writer\n-with open(file_1) as tsvin_1, open(file_2) as tsvin_2, open(output_filename, \'wb\') as tsvout:\n-    tsvreader_2 = csv.reader(tsvin_2, delimiter=\'\\t\')        \n-    tsvout = csv.writer(tsvout, delimiter=\'\\t\')\n-\n-    headers = []\n-    duplicate_indices = []\n-    n = 12 #Index for proteogenomic column start\n-    reg_seq_change = re.compile(\'([A-Z]+)'..b"                                if transcript:\n-                                    mat = SOtranscripts.search(transcript)\n-                                    ref_fullseq = getSequence(mat.group(1))\n-                                    if ref_fullseq:\n-                                        aa_changes = {mat.group(2): [aa_changes.values()[0][0]]}\n-                                        break\n-                        # Resubmits the previous transcripts without extensions if all S.O. transcripts fail to provide a sequence\n-                        if not ref_fullseq:\n-                            transcripts = cells[headers.index('S.O. all transcripts')]\n-                            for transcript in transcripts.split(','):\n-                                if transcript:\n-                                    mat = SOtranscripts.search(transcript)\n-                                    ref_fullseq = getSequence(mat.group(1).split('.')[0])\n-                                    if ref_fullseq:\n-                                        aa_changes = {mat.group(2): [aa_changes.values()[0][0]]}\n-                                        break\n-                        if ref_fullseq:\n-                            # Sorts the amino acid changes\n-                            positions = {}\n-                            for aa_change in aa_changes:\n-                                m = reg_seq_change.search(aa_change)\n-                                aa_protpos = int(m.group(2))\n-                                aa_peppos = aa_changes[aa_change][0]\n-                                aa_startpos = aa_protpos - aa_peppos - 1\n-                                if aa_startpos in positions:\n-                                    positions[aa_startpos].append(aa_change)\n-                                else:\n-                                    positions[aa_startpos] = [aa_change]\n-                            # Goes through the sorted categories to mutate the Ensembl peptide (uses proBED peptide as a reference)\n-                            for pep_protpos in positions:\n-                                ref_seq = ref_fullseq[pep_protpos:pep_protpos+len(pepseq)]\n-                                muts = positions[pep_protpos]\n-                                options = []\n-                                mut_seq = ref_seq\n-                                for mut in muts:\n-                                    m = reg_seq_change.search(mut)\n-                                    ref_aa = m.group(1)\n-                                    mut_pos = int(m.group(2))\n-                                    alt_aa = m.group(3)\n-                                    pep_mutpos = mut_pos - pep_protpos - 1\n-                                    if ref_seq[pep_mutpos] == ref_aa and (pepseq[pep_mutpos] == alt_aa or pepseq[pep_mutpos] == ref_aa):\n-                                        if pepseq[pep_mutpos] == ref_aa:\n-                                            mut_seq = mut_seq[:pep_mutpos] + ref_aa + mut_seq[pep_mutpos+1:]\n-                                        else:\n-                                            mut_seq = mut_seq[:pep_mutpos] + alt_aa + mut_seq[pep_mutpos+1:]\n-                                    else:\n-                                        break\n-                                # Adds the mutated peptide and reference peptide if mutated correctly\n-                                if pepseq == mut_seq:\n-                                    cells[n+1] = pepseq\n-                                    cells[n] = ref_seq\n-                #print  cells\n-                tsvout.writerow(cells)\n-\n-\n-\n-            \n-    \n-\n-#a = 'col1\\tcol2\\tcol3'\n-#header_list = a.split('\\t')\n-\n-#loop through the two results, when you first hit header you print out the headers in tabular form\n-#Print out each header only once\n-#Combine both headers into one output file\n-#loop through the rest of the data and assign each value to its assigned header\n-#combine this all into one output file\n-\n-\n-\n-\n-\n"
b
diff -r 83181dabeb90 -r 2c7bcc1219fc cravat_submit.xml
--- a/cravat_submit.xml Fri May 18 13:25:29 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,44 +0,0 @@
-<tool id="cravat_submit" name="CRAVAT Submit, Check, and Retrieve" version="0.1.0">
-    <description>Submits, checks for, and retrieves data for cancer annotation</description>
-  <command interpreter="python">cravat_submit.py $input $dropdown $GRCh $psm $Variant $Gene $Noncoding $Error</command>
-  
-  
-  <inputs>
-  
-    <param format="tabular" name="input" type="data" label="Source file"> </param>
-    <param format="tabular" name="dropdown" type="select" label="Analysis Program">
-      <option value="">None</option>
-      <option value="VEST">VEST</option>
-      <option value="CHASM">CHASM</option>
-      <option value="VEST;CHASM">VEST and CHASM</option>
-    </param>
-    <param format="tabular" name="GRCh" type="select" label="Genome Reference Consortium Human Build (GRCh)">
-      <option value="GRCh38">GRCh38/hg38</option>
-      <option value="GRCh37">GRCh37/hg19</option>
-    </param>
-    <param format="tabular" name="psm" type="data" optional="true" label="ProBED File(Optional)"> </param>
-    
-    
-  </inputs>
-  
-  <outputs>
-      <collection name="collection" type="list" label="CRAVAT Results: ${on_string} using ${dropdown}">
-        <data format="cravat" label="CRAVAT: Gene Level Annotation Report" name="Gene" />
-        <data format="cravat" label="CRAVAT: Variant Report" name="Variant" />
-        <data format="cravat" label="CRAVAT: Non-coding Variant Report" name="Noncoding" />
-        <data format="cravat" label="CRAVAT: Errors" name="Error" />
-      </collection>
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="fa_gc_content_input.fa"/>
-      <output name="out_file1" file="fa_gc_content_output.txt"/>
-    </test>
-  </tests>
-
-  <help>
- This tool submits, checks for, and retrieves data for cancer annotation from the CRAVAT platform at cravat.us.
-  </help>
-
-</tool>
b
diff -r 83181dabeb90 -r 2c7bcc1219fc cravatp_submit.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cravatp_submit.py Thu Aug 16 12:27:35 2018 -0400
[
b'@@ -0,0 +1,390 @@\n+# -*- coding: utf-8 -*-\n+#\n+# Author: Ray W. Sajulga\n+# \n+#\n+\n+import requests # pipenv requests\n+import json\n+import time\n+import urllib\n+import sys\n+import csv\n+import re\n+import math\n+import argparse\n+from xml.etree import ElementTree as ET\n+from zipfile import ZipFile\n+try: #Python 3\n+    from urllib.request import urlopen\n+except ImportError: #Python 2\n+    from urllib2 import urlopen\n+from io import BytesIO\n+\n+# initializes blank parameters\n+chasm_classifier = \'\'\n+probed_filename = None\n+intersected_only = False\n+vcf_output = None\n+analysis_type = None\n+\n+# # Testing Command\n+# python cravatp_submit.py test-data/Freebayes_two-variants.vcf GRCh38\n+# test-data/variant.tsv test-data/gene.tsv test-data/noncoding.tsv\n+# test-data/error.tsv CHASM -\xe2\x80\x94classifier Breast -\xe2\x80\x94proBED\n+# test-data/MCF7_proBed.bed\n+parser = argparse.ArgumentParser()\n+parser.add_argument(\'cravatInput\',help=\'The filename of the input \'\n+                                       \'CRAVAT-formatted tabular file \'\n+                                       \'(e.g., VCF)\')\n+parser.add_argument(\'GRCh\', help=\'The name of the human reference \'\n+                                 \'genome used for annotation: \'\n+                                 \'GRCh38/hg38 or GRCh37/hg19\')\n+parser.add_argument(\'variant\', help=\'The filename of the output \'\n+                                     \'variant file\')\n+parser.add_argument(\'gene\', help=\'The filename of the output gene \'\n+                                 \'variant report\')\n+parser.add_argument(\'noncoding\', help=\'The filename of the output \'\n+                                       \'non-coding variant report\')\n+parser.add_argument(\'error\', help=\'The filename of the output error \'\n+                                  \'file\')\n+parser.add_argument(\'analysis\', help=\'The machine-learning algorithm \'\n+                                     \'used for CRAVAT annotation (VEST\'\n+                                     \' and/or CHASM)\')\n+parser.add_argument(\'--classifier\', help=\'The cancer classifier for the\'\n+                                         \' CHASM algorithm\')\n+parser.add_argument(\'--proBED\', help=\'The filename of the proBED file \'\n+                                     \'containing peptides with genomic \'\n+                                     \'coordinates\')\n+parser.add_argument(\'--intersectOnly\', help=\'Specifies whether to \'\n+                                            \'analyze only variants \'\n+                                            \'intersected between the \'\n+                                            \'CRAVAT input and proBED \'\n+                                            \'file\')\n+parser.add_argument(\'--vcfOutput\', help=\'The output filename of the \'\n+                                        \'intersected VCF file\')\n+\n+# assigns parsed arguments to appropriate variables\n+args = parser.parse_args()\n+input_filename = args.cravatInput\n+GRCh_build = args.GRCh\n+output_filename = args.variant\n+file_3 = args.gene\n+file_4 = args.noncoding\n+file_5 = args.error\n+if args.analysis != \'None\':\n+    analysis_type = args.analysis\n+if args.classifier:\n+    chasm_classifier = args.classifier\n+if args.proBED:\n+    probed_filename = args.proBED\n+if args.intersectOnly:\n+    intersected_only = args.intersectOnly    \n+if args.vcfOutput:\n+    vcf_output = args.vcfOutput\n+\n+if analysis_type and \'+\' in analysis_type:\n+    analysis_type = \'CHASM;VEST\'\n+\n+# obtains the transcript\'s protein sequence using Ensembl API\n+def getSequence(transcript_id):\n+    server = \'http://rest.ensembl.org\'\n+    ext = (\'/sequence/id/\' + transcript_id \n+           + \'?content-type=text/x-seqxml%2Bxml;\'\n+             \'multiple_sequences=1;type=protein\')\n+    req = requests.get(server+ext,\n+                       headers={ "Content-Type" : "text/plain"})\n+    \n+    if not req.ok:\n+        return None\n+    \n+    root = ET.fromstring(req.content)\n+    for child in root.iter(\'AAseq\'):\n+        return child.text\n+\n+# parses the proBED file as a list. \n+def loadProBED():\n+  '..b"ranscripts = cells[headers.index('S.O. all transcripts')]\n+                            for transcript in transcripts.split(','):\n+                                if transcript:\n+                                    mat = SOtranscripts.search(transcript)\n+                                    ref_fullseq = getSequence(mat.group(1))\n+                                    if ref_fullseq:\n+                                        aa_changes = {mat.group(2): [aa_changes.values()[0][0]]}\n+                                        break\n+                        # Resubmits the previous transcripts without\n+                        # extensions if all S.O. transcripts fail to\n+                        # provide a sequence\n+                        if not ref_fullseq:\n+                            transcripts = cells[headers.index('S.O. all transcripts')]\n+                            for transcript in transcripts.split(','):\n+                                if transcript:\n+                                    mat = SOtranscripts.search(transcript)\n+                                    ref_fullseq = getSequence(mat.group(1).split('.')[0])\n+                                    if ref_fullseq:\n+                                        aa_changes = {mat.group(2): [aa_changes.values()[0][0]]}\n+                                        break\n+                        if ref_fullseq:\n+                            # Sorts the amino acid changes\n+                            positions = {}\n+                            for aa_change in aa_changes:\n+                                m = reg_seq_change.search(aa_change)\n+                                aa_protpos = int(m.group(2))\n+                                aa_peppos = aa_changes[aa_change][0]\n+                                aa_startpos = aa_protpos - aa_peppos - 1\n+                                if aa_startpos in positions:\n+                                    positions[aa_startpos].append(aa_change)\n+                                else:\n+                                    positions[aa_startpos] = [aa_change]\n+                            # Goes through the sorted categories to mutate the Ensembl peptide\n+                            # (uses proBED peptide as a reference)\n+                            for pep_protpos in positions:\n+                                ref_seq = ref_fullseq[pep_protpos:pep_protpos+len(pepseq)]\n+                                muts = positions[pep_protpos]\n+                                options = []\n+                                mut_seq = ref_seq\n+                                for mut in muts:\n+                                    m = reg_seq_change.search(mut)\n+                                    ref_aa = m.group(1)\n+                                    mut_pos = int(m.group(2))\n+                                    alt_aa = m.group(3)\n+                                    pep_mutpos = mut_pos - pep_protpos - 1\n+                                    if (ref_seq[pep_mutpos] == ref_aa\n+                                            and (pepseq[pep_mutpos] == alt_aa\n+                                            or pepseq[pep_mutpos] == ref_aa)):\n+                                        if pepseq[pep_mutpos] == ref_aa:\n+                                            mut_seq = (mut_seq[:pep_mutpos] + ref_aa\n+                                                       + mut_seq[pep_mutpos+1:])\n+                                        else:\n+                                            mut_seq = (mut_seq[:pep_mutpos] + alt_aa\n+                                                       + mut_seq[pep_mutpos+1:])\n+                                    else:\n+                                        break\n+                                # Adds the mutated peptide and reference peptide if mutated correctly\n+                                if pepseq == mut_seq:\n+                                    cells[n+1] = pepseq\n+                                    cells[n] = ref_seq\n+                tsvout.writerow(cells)\n+\n"
b
diff -r 83181dabeb90 -r 2c7bcc1219fc cravatp_submit.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cravatp_submit.xml Thu Aug 16 12:27:35 2018 -0400
[
b'@@ -0,0 +1,296 @@\n+<tool id="cravatp_submit" name="CRAVAT-P Submit, Intersect, Check, and Retrieve" version="1.0.0">\n+    <description>| Submits, intersects, checks for, and retrieves data for cancer annotation.</description>\n+  <command detect_errors="aggressive">\n+  <![CDATA[\n+#if $proteo.proteoInput == \'yes\':\n+    #if \'$analysis.type\' == \'CHASM\' or \'$analysis.type\' == \'CHASM+VEST\':\n+        python \'$__tool_directory__/cravatp_submit.py\' \'$input\' \'$GRCh\' \'$variant\' \'$gene\' \'$noncoding\' \'$error\' \'$analysis.type\' --classifier \'$analysis.classifier\' --proBED \'$proBED\' --intersectOnly \'$proteo.intersectedVariants\' --vcfOutput \'$vcf_output\'\n+    #else: \n+        python \'$__tool_directory__/cravatp_submit.py\' \'$input\' \'$GRCh\' \'$variant\' \'$gene\' \'$noncoding\' \'$error\' \'$analysis.type\' --proBED \'$proBED\' --intersectOnly \'$proteo.intersectedVariants\' --vcfOutput \'$vcf_output\' \n+    #end if\n+#else:\n+    #if $analysis.type == \'CHASM\' or $analysis.type == \'CHASM+VEST\':\n+        python \'$__tool_directory__/cravatp_submit.py\' \'$input\' \'$GRCh\' \'$variant\' \'$gene\' \'$noncoding\' \'$error\' \'$analysis.type\' --classifier \'$analysis.classifier\' \n+    #else: \n+        python \'$__tool_directory__/cravatp_submit.py\' \'$input\' \'$GRCh\' \'$variant\' \'$gene\' \'$noncoding\' \'$error\' \'$analysis.type\'\n+    #end if\n+#end if\n+]]></command>\n+  \n+  <inputs>\n+    <param format="vcf" name="input" type="data" label="Source file" help="Accepts transcriptomic or genomic inputs (e.g., tabular, VCF). Additional details can be found below."></param>\n+    <conditional name="proteo">\n+      <param name="proteoInput" type="select" label="Intersect with proteogenomic input?" help="Source file (first input) must be in genomic input to enable intersection with this proteogenomic file.">\n+        <option value="yes">Yes</option>\n+        <option value="no" selected="true">No</option>\n+      </param>\n+      <when value="yes">\n+        <param format="BED" name="proBED" type="data" label="Peptides with Genomic Coordinates (ProBED Format)"></param>\n+        <param name="intersectedVariants" type="boolean" checked="false" label="Submit only intersected variants?" help="Submits the intersected portion of the genomic file to CRAVAT\'s server. Restricting analysis to only intersected variants takes less time but also provides less-comprehensive results."></param>\n+        <param name="output_vcf" type="boolean" checked="false" label="Output intersected genomic file?" help="The intersected genomic file (e.g., VCF) will be included as a result."></param>\n+      </when>\n+      <when value="no">\n+      </when>\n+    </conditional>\n+    <conditional name="analysis">\n+      <param format="tabular" name="type" type="select" label="Analysis Program" help="VEST and CHASM are machine learning methods for predicting the pathogenicity and functional significance of variants, respectively.">\n+          <option value="None">None</option>\n+          <option value="CHASM">CHASM</option>\n+          <option value="VEST">VEST</option>\n+          <option value="CHASM+VEST">CHASM and VEST</option>\n+      </param>\n+      <when value="None"/>\n+      <when value="VEST"/>\n+      <when value="CHASM">\n+        <param format="tabular" name="classifier" type="select" label="CHASM Classifier">\n+          <option value="Bladder">Bladder</option>\n+          <option value="Blood-Lymphocyte">Blood-Lymphocyte</option>\n+          <option value="Blood-Myeloid">Blood-Myeloid</option>\n+          <option value="Brain-Glioblastoma-Multiforme">Brain-Glioblastoma-Multiforme</option>\n+          <option value="Brain-Lower-Grade-Glioma">Brain-Lower-Grade-Glioma</option>\n+          <option value="Breast">Breast</option>\n+          <option value="Cervix">Cervix</option>\n+          <option value="Colon">Colon</option>\n+          <option value="GID">GID</option>\n+          <option value="Head-and-Neck">Head-and-Neck</option>\n+          <option value="Kidney-Chromophobe">Kidney-Chromophobe</option>\n+          <option value="Kidney-Clear-Cell'..b'nt.vcf"/>\n+      <param name="GRCh" value="GRCh38"/>\n+      <param name="variant" value="variant.tsv"/>\n+      <param name="gene" value="gene.tsv"/>\n+      <param name="noncoding" value="noncoding.tsv"/>\n+      <param name="error" value="error.tsv"/>\n+      <param name="type" value="CHASM" />\n+      <param name="classifier" value="Breast" />\n+      <param name="proteoInput" value="yes" />\n+      <param name="proBED" value="MCF7_proBed.bed"/>\n+      <output_collection name="results" type="list">\n+        <element name="variant">\n+          <assert_contents>\n+            <has_text text="#Variant Report" />\n+            <has_text text="hg38"/>\n+            <has_text text="UPF1" />\n+            <has_text text="EAIDSPVSFLVLHNQIR" />\n+          </assert_contents>\n+        </element>\n+      </output_collection>\n+    </test>\n+    <!-- "Output intersected VCF" test case -->\n+    <test>\n+      <param name="input" value="Freebayes_one-variant.vcf"/>\n+      <param name="GRCh" value="GRCh38"/>\n+      <param name="variant" value="variant.tsv"/>\n+      <param name="gene" value="gene.tsv"/>\n+      <param name="noncoding" value="noncoding.tsv"/>\n+      <param name="error" value="error.tsv"/>\n+      <param name="type" value="CHASM" />\n+      <param name="classifier" value="Breast" />\n+      <param name="proteoInput" value="yes" />\n+      <param name="proBED" value="MCF7_proBed.bed"/>\n+      <param name="output_vcf" value="true"/>\n+      <output name="vcf_output" file="results/intersected_vcf.vcf"/>\n+    </test>\n+    <!-- "Only intersected proteogenomic variants submitted" test case-->\n+    <test>\n+      <param name="input" value="Freebayes_two-variants.vcf"/>\n+      <param name="GRCh" value="GRCh38"/>\n+      <param name="variant" value="variant.tsv"/>\n+      <param name="gene" value="gene.tsv"/>\n+      <param name="noncoding" value="noncoding.tsv"/>\n+      <param name="error" value="error.tsv"/>\n+      <param name="type" value="CHASM" />\n+      <param name="classifier" value="Breast" />\n+      <param name="proteoInput" value="yes" />\n+      <param name="proBED" value="MCF7_proBed.bed"/>\n+      <param name="intersectedVariants" value="true" />\n+      <output_collection name="results" type="list">\n+        <element name="variant">\n+          <assert_contents>\n+            <has_text text="hg38"/>\n+            <has_text text="UPF1" />\n+            <not_has_text text="CRABP2"/>\n+          </assert_contents>\n+        </element>\n+      </output_collection>\n+    </test>\n+  </tests>\n+  <help><![CDATA[\n+    This tool submits, checks for, and retrieves data for cancer annotation from the CRAVAT platform at https://www.cravat.us. \n+    For additional details on input format, visit this link: http://cravat.us/CRAVAT/help.jsp.\n+\n+-----\n+\n+**Input Type Example:**\n+\n+  CRAVAT Format (*Genomic-coordinate format*)\n+\n+    =====  =====  =========  ======  =========  =========  ====================\n+    UID    Chr.   Position   Strand  Ref. base  Alt. base  Sample ID (optional)\n+    =====  =====  =========  ======  =========  =========  ====================\n+    TR1    chr17  7674188    \\-      G          T          TCGA-02-0231\n+    TR2    chr10  121520166  \\-      G          A          TCGA-02-3512\n+    TR3    chr13  48459831   \\+      C          A          TCGA-02-3532\n+    TR4    chr7   116777451  \\+      G          T          TCGA-02-1523\n+    TR5    chr7   140753336  \\-      T          A          TCGA-02-0023\n+    TR6    chr17  39724745   \\+      G          T          TCGA-02-0252\n+    Ins1   chr17  39724745   \\+      \\-         T          TCGA-02-0252\n+    Del1   chr17  39724745   \\+      A          \\-         TCGA-02-0252\n+    CSub1  chr2   39644095   \\+      ATGCT      GA         TCGA-02-0252\n+    =====  =====  =========  ======  =========  =========  ====================\n+]]>\n+  </help>\n+  <citations>\n+    <citation type="doi">10.1158/0008-5472.CAN-17-0338</citation>\n+    <citation type="doi">10.1186/s13059-017-1377-x</citation>\n+  </citations>\n+</tool>\n'
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-data/._variant.tsv
b
Binary file test-data/._variant.tsv has changed
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-data/Freebayes.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Freebayes.vcf Thu Aug 16 12:27:35 2018 -0400
b
b'@@ -0,0 +1,548 @@\n+##fileformat=VCFv4.2\n+##fileDate=20180518\n+##source=freeBayes v1.1.0-46-g8d2b3a0-dirty\n+##reference=/panfs/roc/rissdb/galaxy/genomes/hg38/seq/hg38.fa\n+##contig=<ID=chr1,length=248956422>\n+##contig=<ID=chr10,length=133797422>\n+##contig=<ID=chr11,length=135086622>\n+##contig=<ID=chr11_KI270721v1_random,length=100316>\n+##contig=<ID=chr12,length=133275309>\n+##contig=<ID=chr13,length=114364328>\n+##contig=<ID=chr14,length=107043718>\n+##contig=<ID=chr14_GL000009v2_random,length=201709>\n+##contig=<ID=chr14_GL000225v1_random,length=211173>\n+##contig=<ID=chr14_KI270722v1_random,length=194050>\n+##contig=<ID=chr14_GL000194v1_random,length=191469>\n+##contig=<ID=chr14_KI270723v1_random,length=38115>\n+##contig=<ID=chr14_KI270724v1_random,length=39555>\n+##contig=<ID=chr14_KI270725v1_random,length=172810>\n+##contig=<ID=chr14_KI270726v1_random,length=43739>\n+##contig=<ID=chr15,length=101991189>\n+##contig=<ID=chr15_KI270727v1_random,length=448248>\n+##contig=<ID=chr16,length=90338345>\n+##contig=<ID=chr16_KI270728v1_random,length=1872759>\n+##contig=<ID=chr17,length=83257441>\n+##contig=<ID=chr17_GL000205v2_random,length=185591>\n+##contig=<ID=chr17_KI270729v1_random,length=280839>\n+##contig=<ID=chr17_KI270730v1_random,length=112551>\n+##contig=<ID=chr18,length=80373285>\n+##contig=<ID=chr19,length=58617616>\n+##contig=<ID=chr1_KI270706v1_random,length=175055>\n+##contig=<ID=chr1_KI270707v1_random,length=32032>\n+##contig=<ID=chr1_KI270708v1_random,length=127682>\n+##contig=<ID=chr1_KI270709v1_random,length=66860>\n+##contig=<ID=chr1_KI270710v1_random,length=40176>\n+##contig=<ID=chr1_KI270711v1_random,length=42210>\n+##contig=<ID=chr1_KI270712v1_random,length=176043>\n+##contig=<ID=chr1_KI270713v1_random,length=40745>\n+##contig=<ID=chr1_KI270714v1_random,length=41717>\n+##contig=<ID=chr2,length=242193529>\n+##contig=<ID=chr20,length=64444167>\n+##contig=<ID=chr21,length=46709983>\n+##contig=<ID=chr22,length=50818468>\n+##contig=<ID=chr22_KI270731v1_random,length=150754>\n+##contig=<ID=chr22_KI270732v1_random,length=41543>\n+##contig=<ID=chr22_KI270733v1_random,length=179772>\n+##contig=<ID=chr22_KI270734v1_random,length=165050>\n+##contig=<ID=chr22_KI270735v1_random,length=42811>\n+##contig=<ID=chr22_KI270736v1_random,length=181920>\n+##contig=<ID=chr22_KI270737v1_random,length=103838>\n+##contig=<ID=chr22_KI270738v1_random,length=99375>\n+##contig=<ID=chr22_KI270739v1_random,length=73985>\n+##contig=<ID=chr2_KI270715v1_random,length=161471>\n+##contig=<ID=chr2_KI270716v1_random,length=153799>\n+##contig=<ID=chr3,length=198295559>\n+##contig=<ID=chr3_GL000221v1_random,length=155397>\n+##contig=<ID=chr4,length=190214555>\n+##contig=<ID=chr4_GL000008v2_random,length=209709>\n+##contig=<ID=chr5,length=181538259>\n+##contig=<ID=chr5_GL000208v1_random,length=92689>\n+##contig=<ID=chr6,length=170805979>\n+##contig=<ID=chr7,length=159345973>\n+##contig=<ID=chr8,length=145138636>\n+##contig=<ID=chr9,length=138394717>\n+##contig=<ID=chr9_KI270717v1_random,length=40062>\n+##contig=<ID=chr9_KI270718v1_random,length=38054>\n+##contig=<ID=chr9_KI270719v1_random,length=176845>\n+##contig=<ID=chr9_KI270720v1_random,length=39050>\n+##contig=<ID=chr1_KI270762v1_alt,length=354444>\n+##contig=<ID=chr1_KI270766v1_alt,length=256271>\n+##contig=<ID=chr1_KI270760v1_alt,length=109528>\n+##contig=<ID=chr1_KI270765v1_alt,length=185285>\n+##contig=<ID=chr1_GL383518v1_alt,length=182439>\n+##contig=<ID=chr1_GL383519v1_alt,length=110268>\n+##contig=<ID=chr1_GL383520v2_alt,length=366580>\n+##contig=<ID=chr1_KI270764v1_alt,length=50258>\n+##contig=<ID=chr1_KI270763v1_alt,length=911658>\n+##contig=<ID=chr1_KI270759v1_alt,length=425601>\n+##contig=<ID=chr1_KI270761v1_alt,length=165834>\n+##contig=<ID=chr2_KI270770v1_alt,length=136240>\n+##contig=<ID=chr2_KI270773v1_alt,length=70887>\n+##contig=<ID=chr2_KI270774v1_alt,length=223625>\n+##contig=<ID=chr2_KI270769v1_alt,length=120616>\n+##contig=<ID=chr2_GL383521v1_alt,length=143390>\n+##contig=<ID=chr2_KI270772v1_alt,length=133041>\n+##contig=<ID=chr2_KI270775v1_alt,length'..b'13.8677;RPR=2;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=1;SRP=6.91895;SRR=4;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:7:5,2:5:169:2:28:-0.55277,0,-10.4001\n+chr19\t17205444\t.\tT\tC\t206.198\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=7;CIGAR=1X;DP=7;DPB=7;DPRA=0;EPP=5.80219;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=14.3092;PAIRED=0.857143;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=249;QR=0;RO=0;RPL=2;RPP=5.80219;RPPR=0;RPR=5;RUN=1;SAF=4;SAP=3.32051;SAR=3;SRF=0;SRP=0;SRR=0;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t1/1:7:0,7:0:0:7:249:-22.753,-2.10721,0\n+chr19\t17205973\t.\tT\tC\t12243.8\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=406;CIGAR=1X;DP=406;DPB=406;DPRA=0;EPP=14.3276;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=58.4015;MQMR=0;NS=1;NUMALT=1;ODDS=567.441;PAIRED=0.985222;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=14833;QR=0;RO=0;RPL=368;RPP=585.457;RPPR=0;RPR=38;RUN=1;SAF=182;SAP=12.445;SAR=224;SRF=0;SRP=0;SRR=0;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t1/1:406:0,406:0:0:406:14833:-1297.85,-122.218,0\n+chr19\t18856059\t.\tC\tT\t10269.5\t.\tAB=0.248844;ABP=1306.46;AC=1;AF=0.5;AN=2;AO=592;CIGAR=1X;DP=2379;DPB=2379;DPRA=0;EPP=30.139;EPPR=172.262;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=59.9339;NS=1;NUMALT=1;ODDS=2364.65;PAIRED=0.991554;PAIREDR=0.983754;PAO=0;PQA=0;PQR=0;PRO=0;QA=21546;QR=64865;RO=1785;RPL=120;RPP=457.494;RPPR=1043.89;RPR=472;RUN=1;SAF=303;SAP=3.72923;SAR=289;SRF=873;SRP=4.86061;SRR=912;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:2379:1785,592:1785:64865:592:21546:-1222.54,0,-5112.92\n+chr19\t18867128\t.\tG\tC\t398.913\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=14;CIGAR=1X;DP=14;DPB=14;DPRA=0;EPP=33.4109;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=24.0133;PAIRED=1;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=523;QR=0;RO=0;RPL=11;RPP=12.937;RPPR=0;RPR=3;RUN=1;SAF=11;SAP=12.937;SAR=3;SRF=0;SRP=0;SRR=0;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t1/1:14:0,14:0:0:14:523:-47.4124,-4.21442,0\n+chr2\t231456129\t.\tGGATGGCT\tTGATTCAC\t54.047\t.\tAB=0.363636;ABP=4.78696;AC=1;AF=0.5;AN=2;AO=4;CIGAR=1X3M4X;DP=11;DPB=12.375;DPRA=0;EPP=3.0103;EPPR=5.80219;GTI=0;LEN=8;MEANALT=1;MQM=60;MQMR=51.5714;NS=1;NUMALT=1;ODDS=12.4448;PAIRED=1;PAIREDR=0.714286;PAO=0;PQA=0;PQR=134;PRO=4;QA=150;QR=252;RO=7;RPL=0;RPP=11.6962;RPPR=18.2106;RPR=4;RUN=1;SAF=2;SAP=3.0103;SAR=2;SRF=5;SRP=5.80219;SRR=2;TYPE=complex\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:11:7,4:7:252:4:150:-9.35048,0,-24.4716\n+chr2\t231456762\t.\tT\tC\t60.9983\t.\tAB=0.333333;ABP=7.35324;AC=1;AF=0.5;AN=2;AO=6;CIGAR=1X;DP=18;DPB=18;DPRA=0;EPP=3.0103;EPPR=9.52472;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=14.0454;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=181;QR=429;RO=12;RPL=0;RPP=16.0391;RPPR=29.068;RPR=6;RUN=1;SAF=3;SAP=3.0103;SAR=3;SRF=9;SRP=9.52472;SRR=3;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:18:12,6:12:429:6:181:-11.1631,0,-33.5255\n+chr2\t231456763\t.\tA\tG\t0.000241075\t.\tAB=0.25;ABP=9.52472;AC=1;AF=0.5;AN=2;AO=3;CIGAR=1X;DP=12;DPB=12;DPRA=0;EPP=9.52472;EPPR=18.2106;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=9.79893;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=39;QR=257;RO=7;RPL=0;RPP=9.52472;RPPR=18.2106;RPR=3;RUN=1;SAF=0;SAP=9.52472;SAR=3;SRF=7;SRP=18.2106;SRR=0;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:12:7,3:7:257:3:39:-0.481678,0,-20.2656\n+chr2\t231456764\t.\tC\tT\t67.7463\t.\tAB=0.714286;ABP=5.80219;AC=1;AF=0.5;AN=2;AO=5;CIGAR=1X;DP=7;DPB=7;DPRA=0;EPP=13.8677;EPPR=7.35324;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=3.26857;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=166;QR=76;RO=2;RPL=0;RPP=13.8677;RPPR=7.35324;RPR=5;RUN=1;SAF=5;SAP=13.8677;SAR=0;SRF=2;SRP=7.35324;SRR=0;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:7:2,5:2:76:5:166:-13.1558,0,-5.1076\n+chr2\t231457190\t.\tT\tC\t744.752\t.\tAB=0.221739;ABP=157.695;AC=1;AF=0.5;AN=2;AO=51;CIGAR=1X;DP=230;DPB=230;DPRA=0;EPP=3.3935;EPPR=7.38964;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=171.486;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=1858;QR=6693;RO=179;RPL=34;RPP=15.3153;RPPR=25.4408;RPR=17;RUN=1;SAF=18;SAP=12.5903;SAR=33;SRF=84;SRP=4.47817;SRR=95;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:230:179,51:179:6693:51:1858:-98.2392,0,-533.104\n'
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-data/Freebayes_one-variant.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Freebayes_one-variant.vcf Thu Aug 16 12:27:35 2018 -0400
b
b'@@ -0,0 +1,516 @@\n+##fileformat=VCFv4.2\n+##fileDate=20180518\n+##source=freeBayes v1.1.0-46-g8d2b3a0-dirty\n+##reference=/panfs/roc/rissdb/galaxy/genomes/hg38/seq/hg38.fa\n+##contig=<ID=chr1,length=248956422>\n+##contig=<ID=chr10,length=133797422>\n+##contig=<ID=chr11,length=135086622>\n+##contig=<ID=chr11_KI270721v1_random,length=100316>\n+##contig=<ID=chr12,length=133275309>\n+##contig=<ID=chr13,length=114364328>\n+##contig=<ID=chr14,length=107043718>\n+##contig=<ID=chr14_GL000009v2_random,length=201709>\n+##contig=<ID=chr14_GL000225v1_random,length=211173>\n+##contig=<ID=chr14_KI270722v1_random,length=194050>\n+##contig=<ID=chr14_GL000194v1_random,length=191469>\n+##contig=<ID=chr14_KI270723v1_random,length=38115>\n+##contig=<ID=chr14_KI270724v1_random,length=39555>\n+##contig=<ID=chr14_KI270725v1_random,length=172810>\n+##contig=<ID=chr14_KI270726v1_random,length=43739>\n+##contig=<ID=chr15,length=101991189>\n+##contig=<ID=chr15_KI270727v1_random,length=448248>\n+##contig=<ID=chr16,length=90338345>\n+##contig=<ID=chr16_KI270728v1_random,length=1872759>\n+##contig=<ID=chr17,length=83257441>\n+##contig=<ID=chr17_GL000205v2_random,length=185591>\n+##contig=<ID=chr17_KI270729v1_random,length=280839>\n+##contig=<ID=chr17_KI270730v1_random,length=112551>\n+##contig=<ID=chr18,length=80373285>\n+##contig=<ID=chr19,length=58617616>\n+##contig=<ID=chr1_KI270706v1_random,length=175055>\n+##contig=<ID=chr1_KI270707v1_random,length=32032>\n+##contig=<ID=chr1_KI270708v1_random,length=127682>\n+##contig=<ID=chr1_KI270709v1_random,length=66860>\n+##contig=<ID=chr1_KI270710v1_random,length=40176>\n+##contig=<ID=chr1_KI270711v1_random,length=42210>\n+##contig=<ID=chr1_KI270712v1_random,length=176043>\n+##contig=<ID=chr1_KI270713v1_random,length=40745>\n+##contig=<ID=chr1_KI270714v1_random,length=41717>\n+##contig=<ID=chr2,length=242193529>\n+##contig=<ID=chr20,length=64444167>\n+##contig=<ID=chr21,length=46709983>\n+##contig=<ID=chr22,length=50818468>\n+##contig=<ID=chr22_KI270731v1_random,length=150754>\n+##contig=<ID=chr22_KI270732v1_random,length=41543>\n+##contig=<ID=chr22_KI270733v1_random,length=179772>\n+##contig=<ID=chr22_KI270734v1_random,length=165050>\n+##contig=<ID=chr22_KI270735v1_random,length=42811>\n+##contig=<ID=chr22_KI270736v1_random,length=181920>\n+##contig=<ID=chr22_KI270737v1_random,length=103838>\n+##contig=<ID=chr22_KI270738v1_random,length=99375>\n+##contig=<ID=chr22_KI270739v1_random,length=73985>\n+##contig=<ID=chr2_KI270715v1_random,length=161471>\n+##contig=<ID=chr2_KI270716v1_random,length=153799>\n+##contig=<ID=chr3,length=198295559>\n+##contig=<ID=chr3_GL000221v1_random,length=155397>\n+##contig=<ID=chr4,length=190214555>\n+##contig=<ID=chr4_GL000008v2_random,length=209709>\n+##contig=<ID=chr5,length=181538259>\n+##contig=<ID=chr5_GL000208v1_random,length=92689>\n+##contig=<ID=chr6,length=170805979>\n+##contig=<ID=chr7,length=159345973>\n+##contig=<ID=chr8,length=145138636>\n+##contig=<ID=chr9,length=138394717>\n+##contig=<ID=chr9_KI270717v1_random,length=40062>\n+##contig=<ID=chr9_KI270718v1_random,length=38054>\n+##contig=<ID=chr9_KI270719v1_random,length=176845>\n+##contig=<ID=chr9_KI270720v1_random,length=39050>\n+##contig=<ID=chr1_KI270762v1_alt,length=354444>\n+##contig=<ID=chr1_KI270766v1_alt,length=256271>\n+##contig=<ID=chr1_KI270760v1_alt,length=109528>\n+##contig=<ID=chr1_KI270765v1_alt,length=185285>\n+##contig=<ID=chr1_GL383518v1_alt,length=182439>\n+##contig=<ID=chr1_GL383519v1_alt,length=110268>\n+##contig=<ID=chr1_GL383520v2_alt,length=366580>\n+##contig=<ID=chr1_KI270764v1_alt,length=50258>\n+##contig=<ID=chr1_KI270763v1_alt,length=911658>\n+##contig=<ID=chr1_KI270759v1_alt,length=425601>\n+##contig=<ID=chr1_KI270761v1_alt,length=165834>\n+##contig=<ID=chr2_KI270770v1_alt,length=136240>\n+##contig=<ID=chr2_KI270773v1_alt,length=70887>\n+##contig=<ID=chr2_KI270774v1_alt,length=223625>\n+##contig=<ID=chr2_KI270769v1_alt,length=120616>\n+##contig=<ID=chr2_GL383521v1_alt,length=143390>\n+##contig=<ID=chr2_KI270772v1_alt,length=133041>\n+##contig=<ID=chr2_KI270775v1_alt,length'..b'=A,Type=Float,Description="End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding\'s inequality">\n+##INFO=<ID=EPPR,Number=1,Type=Float,Description="End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding\'s inequality">\n+##INFO=<ID=DPRA,Number=A,Type=Float,Description="Alternate allele depth ratio.  Ratio between depth in samples with each called alternate allele and those without.">\n+##INFO=<ID=ODDS,Number=1,Type=Float,Description="The log odds ratio of the best genotype combination to the second-best.">\n+##INFO=<ID=GTI,Number=1,Type=Integer,Description="Number of genotyping iterations required to reach convergence or bailout.">\n+##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex.">\n+##INFO=<ID=CIGAR,Number=A,Type=String,Description="The extended CIGAR representation of each alternate allele, with the exception that \'=\' is replaced by \'M\' to ease VCF parsing.  Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR.">\n+##INFO=<ID=NUMALT,Number=1,Type=Integer,Description="Number of unique non-reference alleles in called genotypes at this position.">\n+##INFO=<ID=MEANALT,Number=A,Type=Float,Description="Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles.">\n+##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length">\n+##INFO=<ID=MQM,Number=A,Type=Float,Description="Mean mapping quality of observed alternate alleles">\n+##INFO=<ID=MQMR,Number=1,Type=Float,Description="Mean mapping quality of observed reference alleles">\n+##INFO=<ID=PAIRED,Number=A,Type=Float,Description="Proportion of observed alternate alleles which are supported by properly paired read fragments">\n+##INFO=<ID=PAIREDR,Number=1,Type=Float,Description="Proportion of observed reference alleles which are supported by properly paired read fragments">\n+##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block.">\n+##INFO=<ID=END,Number=1,Type=Integer,Description="Last position (inclusive) in gVCF output record.">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype">\n+##FORMAT=<ID=GL,Number=G,Type=Float,Description="Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy">\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">\n+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Number of observation for each allele">\n+##FORMAT=<ID=RO,Number=1,Type=Integer,Description="Reference allele observation count">\n+##FORMAT=<ID=QR,Number=1,Type=Integer,Description="Sum of quality of the reference observations">\n+##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count">\n+##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations">\n+##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block.">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tunknown\n+chr19\t18856059\t.\tC\tT\t10269.5\t.\tAB=0.248844;ABP=1306.46;AC=1;AF=0.5;AN=2;AO=592;CIGAR=1X;DP=2379;DPB=2379;DPRA=0;EPP=30.139;EPPR=172.262;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=59.9339;NS=1;NUMALT=1;ODDS=2364.65;PAIRED=0.991554;PAIREDR=0.983754;PAO=0;PQA=0;PQR=0;PRO=0;QA=21546;QR=64865;RO=1785;RPL=120;RPP=457.494;RPPR=1043.89;RPR=472;RUN=1;SAF=303;SAP=3.72923;SAR=289;SRF=873;SRP=4.86061;SRR=912;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:2379:1785,592:1785:64865:592:21546:-1222.54,0,-5112.92\n'
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-data/Freebayes_special-cases.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Freebayes_special-cases.vcf Thu Aug 16 12:27:35 2018 -0400
b
b'@@ -0,0 +1,523 @@\n+##fileformat=VCFv4.2\n+##fileDate=20180518\n+##source=freeBayes v1.1.0-46-g8d2b3a0-dirty\n+##reference=/panfs/roc/rissdb/galaxy/genomes/hg38/seq/hg38.fa\n+##contig=<ID=chr1,length=248956422>\n+##contig=<ID=chr10,length=133797422>\n+##contig=<ID=chr11,length=135086622>\n+##contig=<ID=chr11_KI270721v1_random,length=100316>\n+##contig=<ID=chr12,length=133275309>\n+##contig=<ID=chr13,length=114364328>\n+##contig=<ID=chr14,length=107043718>\n+##contig=<ID=chr14_GL000009v2_random,length=201709>\n+##contig=<ID=chr14_GL000225v1_random,length=211173>\n+##contig=<ID=chr14_KI270722v1_random,length=194050>\n+##contig=<ID=chr14_GL000194v1_random,length=191469>\n+##contig=<ID=chr14_KI270723v1_random,length=38115>\n+##contig=<ID=chr14_KI270724v1_random,length=39555>\n+##contig=<ID=chr14_KI270725v1_random,length=172810>\n+##contig=<ID=chr14_KI270726v1_random,length=43739>\n+##contig=<ID=chr15,length=101991189>\n+##contig=<ID=chr15_KI270727v1_random,length=448248>\n+##contig=<ID=chr16,length=90338345>\n+##contig=<ID=chr16_KI270728v1_random,length=1872759>\n+##contig=<ID=chr17,length=83257441>\n+##contig=<ID=chr17_GL000205v2_random,length=185591>\n+##contig=<ID=chr17_KI270729v1_random,length=280839>\n+##contig=<ID=chr17_KI270730v1_random,length=112551>\n+##contig=<ID=chr18,length=80373285>\n+##contig=<ID=chr19,length=58617616>\n+##contig=<ID=chr1_KI270706v1_random,length=175055>\n+##contig=<ID=chr1_KI270707v1_random,length=32032>\n+##contig=<ID=chr1_KI270708v1_random,length=127682>\n+##contig=<ID=chr1_KI270709v1_random,length=66860>\n+##contig=<ID=chr1_KI270710v1_random,length=40176>\n+##contig=<ID=chr1_KI270711v1_random,length=42210>\n+##contig=<ID=chr1_KI270712v1_random,length=176043>\n+##contig=<ID=chr1_KI270713v1_random,length=40745>\n+##contig=<ID=chr1_KI270714v1_random,length=41717>\n+##contig=<ID=chr2,length=242193529>\n+##contig=<ID=chr20,length=64444167>\n+##contig=<ID=chr21,length=46709983>\n+##contig=<ID=chr22,length=50818468>\n+##contig=<ID=chr22_KI270731v1_random,length=150754>\n+##contig=<ID=chr22_KI270732v1_random,length=41543>\n+##contig=<ID=chr22_KI270733v1_random,length=179772>\n+##contig=<ID=chr22_KI270734v1_random,length=165050>\n+##contig=<ID=chr22_KI270735v1_random,length=42811>\n+##contig=<ID=chr22_KI270736v1_random,length=181920>\n+##contig=<ID=chr22_KI270737v1_random,length=103838>\n+##contig=<ID=chr22_KI270738v1_random,length=99375>\n+##contig=<ID=chr22_KI270739v1_random,length=73985>\n+##contig=<ID=chr2_KI270715v1_random,length=161471>\n+##contig=<ID=chr2_KI270716v1_random,length=153799>\n+##contig=<ID=chr3,length=198295559>\n+##contig=<ID=chr3_GL000221v1_random,length=155397>\n+##contig=<ID=chr4,length=190214555>\n+##contig=<ID=chr4_GL000008v2_random,length=209709>\n+##contig=<ID=chr5,length=181538259>\n+##contig=<ID=chr5_GL000208v1_random,length=92689>\n+##contig=<ID=chr6,length=170805979>\n+##contig=<ID=chr7,length=159345973>\n+##contig=<ID=chr8,length=145138636>\n+##contig=<ID=chr9,length=138394717>\n+##contig=<ID=chr9_KI270717v1_random,length=40062>\n+##contig=<ID=chr9_KI270718v1_random,length=38054>\n+##contig=<ID=chr9_KI270719v1_random,length=176845>\n+##contig=<ID=chr9_KI270720v1_random,length=39050>\n+##contig=<ID=chr1_KI270762v1_alt,length=354444>\n+##contig=<ID=chr1_KI270766v1_alt,length=256271>\n+##contig=<ID=chr1_KI270760v1_alt,length=109528>\n+##contig=<ID=chr1_KI270765v1_alt,length=185285>\n+##contig=<ID=chr1_GL383518v1_alt,length=182439>\n+##contig=<ID=chr1_GL383519v1_alt,length=110268>\n+##contig=<ID=chr1_GL383520v2_alt,length=366580>\n+##contig=<ID=chr1_KI270764v1_alt,length=50258>\n+##contig=<ID=chr1_KI270763v1_alt,length=911658>\n+##contig=<ID=chr1_KI270759v1_alt,length=425601>\n+##contig=<ID=chr1_KI270761v1_alt,length=165834>\n+##contig=<ID=chr2_KI270770v1_alt,length=136240>\n+##contig=<ID=chr2_KI270773v1_alt,length=70887>\n+##contig=<ID=chr2_KI270774v1_alt,length=223625>\n+##contig=<ID=chr2_KI270769v1_alt,length=120616>\n+##contig=<ID=chr2_GL383521v1_alt,length=143390>\n+##contig=<ID=chr2_KI270772v1_alt,length=133041>\n+##contig=<ID=chr2_KI270775v1_alt,length'..b'\n+##FORMAT=<ID=AO,Number=A,Type=Integer,Description="Alternate allele observation count">\n+##FORMAT=<ID=QA,Number=A,Type=Integer,Description="Sum of quality of the alternate observations">\n+##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum depth in gVCF output block.">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tunknown\n+chr1\t156701053\t.\tCT\tAG\t24.1256\t.\tAB=0.666667;ABP=3.73412;AC=1;AF=0.5;AN=2;AO=2;CIGAR=2X;DP=3;DPB=4;DPRA=0;EPP=7.35324;EPPR=5.18177;GTI=0;LEN=2;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=2.97054;PAIRED=0.5;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=2;QA=72;QR=38;RO=1;RPL=0;RPP=7.35324;RPPR=5.18177;RPR=2;RUN=1;SAF=2;SAP=7.35324;SAR=0;SRF=1;SRP=5.18177;SRR=0;TYPE=complex\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:3:1,2:1:38:2:72:-5.33122,0,-2.03897\n+chr12\t6561055\t.\tT\tC\t14340.8\t.\tAB=0.235264;ABP=2357.67;AC=1;AF=0.5;AN=2;AO=910;CIGAR=1X;DP=3868;DPB=3868;DPRA=0;EPP=7.21962;EPPR=59.3944;GTI=0;LEN=1;MEANALT=2;MQM=59.9352;MQMR=59.7205;NS=1;NUMALT=1;ODDS=3302.08;PAIRED=0.983516;PAIREDR=0.990186;PAO=0;PQA=0;PQR=0;PRO=0;QA=33224;QR=106496;RO=2955;RPL=107;RPP=1158.94;RPPR=4030.18;RPR=803;RUN=1;SAF=481;SAP=9.46268;SAR=429;SRF=1663;SRP=104.155;SRR=1292;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:3868:2955,910:2955:106496:910:33224:-1821.72,0,-8372.79\n+chr14\t102083954\t.\tC\tT\t240741\t.\tAB=0.203722;ABP=65019.6;AC=1;AF=0.5;AN=2;AO=17372;CIGAR=1X;DP=85273;DPB=85273;DPRA=0;EPP=2106.28;EPPR=12892.8;GTI=0;LEN=1;MEANALT=3;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=55432.6;PAIRED=0.982155;PAIREDR=0.980402;PAO=0;PQA=0;PQR=0;PRO=0;QA=637523;QR=2489236;RO=67865;RPL=5766;RPP=4266.16;RPPR=13677.6;RPR=11606;RUN=1;SAF=9063;SAP=74.0739;SAR=8309;SRF=35817;SRP=457.538;SRR=32048;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:85273:67865,17372:67865:2489236:17372:637523:-31680.6,0,-198225\n+chr19\t17205335\t.\tA\tT\t0.00158993\t.\tAB=0.285714;ABP=5.80219;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=7;DPB=7;DPRA=0;EPP=3.0103;EPPR=6.91895;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=48.2;NS=1;NUMALT=1;ODDS=7.91247;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=28;QR=169;RO=5;RPL=0;RPP=7.35324;RPPR=13.8677;RPR=2;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=1;SRP=6.91895;SRR=4;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:7:5,2:5:169:2:28:-0.55277,0,-10.4001\n+chr19\t17205973\t.\tT\tC\t12243.8\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=406;CIGAR=1X;DP=406;DPB=406;DPRA=0;EPP=14.3276;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=58.4015;MQMR=0;NS=1;NUMALT=1;ODDS=567.441;PAIRED=0.985222;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=14833;QR=0;RO=0;RPL=368;RPP=585.457;RPPR=0;RPR=38;RUN=1;SAF=182;SAP=12.445;SAR=224;SRF=0;SRP=0;SRR=0;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t1/1:406:0,406:0:0:406:14833:-1297.85,-122.218,0\n+chr2\t231456129\t.\tGGATGGCT\tTGATTCAC\t54.047\t.\tAB=0.363636;ABP=4.78696;AC=1;AF=0.5;AN=2;AO=4;CIGAR=1X3M4X;DP=11;DPB=12.375;DPRA=0;EPP=3.0103;EPPR=5.80219;GTI=0;LEN=8;MEANALT=1;MQM=60;MQMR=51.5714;NS=1;NUMALT=1;ODDS=12.4448;PAIRED=1;PAIREDR=0.714286;PAO=0;PQA=0;PQR=134;PRO=4;QA=150;QR=252;RO=7;RPL=0;RPP=11.6962;RPPR=18.2106;RPR=4;RUN=1;SAF=2;SAP=3.0103;SAR=2;SRF=5;SRP=5.80219;SRR=2;TYPE=complex\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:11:7,4:7:252:4:150:-9.35048,0,-24.4716\n+chr1\t189392\t.\tACCC\tAC\t441.589\t.\tAB=0.294118;ABP=34.305;AC=1;AF=0.5;AN=2;AO=25;CIGAR=1M2D1M;DP=85;DPB=72.5;DPRA=0;EPP=10.0459;EPPR=17.4868;GTI=0;LEN=2;MEANALT=1;MQM=57.64;MQMR=11.8167;NS=1;NUMALT=1;ODDS=15.1558;PAIRED=1;PAIREDR=0.983333;PAO=0;PQA=0;PQR=0;PRO=0;QA=845;QR=2056;RO=60;RPL=11;RPP=3.79203;RPPR=79.5909;RPR=14;RUN=1;SAF=17;SAP=10.0459;SAR=8;SRF=19;SRP=20.5268;SRR=41;TYPE=del\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:85:60,25:60:2056:25:845:-47.6855,0,-14.332\n+chr1\t1758687\t.\tAACA\tAACACA\t483.954\t.\tAB=0.469388;ABP=3.40914;AC=1;AF=0.5;AN=2;AO=23;CIGAR=1M2I3M;DP=49;DPB=60.5;DPRA=0;EPP=44.6459;EPPR=15.0369;GTI=0;LEN=2;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=111.434;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=814;QR=931;RO=26;RPL=13;RPP=3.86001;RPPR=4.34659;RPR=10;RUN=1;SAF=9;SAP=5.3706;SAR=14;SRF=10;SRP=6.01695;SRR=16;TYPE=ins\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:49:26,23:26:931:23:814:-58.8184,0,-69.3497\n\\ No newline at end of file\n'
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-data/Freebayes_two-variants.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Freebayes_two-variants.vcf Thu Aug 16 12:27:35 2018 -0400
b
b'@@ -0,0 +1,517 @@\n+##fileformat=VCFv4.2\t\t\t\t\t\t\t\t\t\r\n+##fileDate=20180518\t\t\t\t\t\t\t\t\t\r\n+##source=freeBayes v1.1.0-46-g8d2b3a0-dirty\t\t\t\t\t\t\t\t\t\r\n+##reference=/panfs/roc/rissdb/galaxy/genomes/hg38/seq/hg38.fa\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr1,length=248956422>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr10,length=133797422>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr11,length=135086622>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr11_KI270721v1_random,length=100316>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr12,length=133275309>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr13,length=114364328>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr14,length=107043718>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr14_GL000009v2_random,length=201709>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr14_GL000225v1_random,length=211173>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr14_KI270722v1_random,length=194050>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr14_GL000194v1_random,length=191469>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr14_KI270723v1_random,length=38115>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr14_KI270724v1_random,length=39555>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr14_KI270725v1_random,length=172810>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr14_KI270726v1_random,length=43739>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr15,length=101991189>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr15_KI270727v1_random,length=448248>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr16,length=90338345>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr16_KI270728v1_random,length=1872759>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr17,length=83257441>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr17_GL000205v2_random,length=185591>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr17_KI270729v1_random,length=280839>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr17_KI270730v1_random,length=112551>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr18,length=80373285>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr19,length=58617616>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr1_KI270706v1_random,length=175055>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr1_KI270707v1_random,length=32032>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr1_KI270708v1_random,length=127682>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr1_KI270709v1_random,length=66860>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr1_KI270710v1_random,length=40176>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr1_KI270711v1_random,length=42210>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr1_KI270712v1_random,length=176043>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr1_KI270713v1_random,length=40745>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr1_KI270714v1_random,length=41717>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr2,length=242193529>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr20,length=64444167>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr21,length=46709983>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr22,length=50818468>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr22_KI270731v1_random,length=150754>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr22_KI270732v1_random,length=41543>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr22_KI270733v1_random,length=179772>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr22_KI270734v1_random,length=165050>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr22_KI270735v1_random,length=42811>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr22_KI270736v1_random,length=181920>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr22_KI270737v1_random,length=103838>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr22_KI270738v1_random,length=99375>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr22_KI270739v1_random,length=73985>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr2_KI270715v1_random,length=161471>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr2_KI270716v1_random,length=153799>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr3,length=198295559>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr3_GL000221v1_random,length=155397>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr4,length=190214555>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr4_GL000008v2_random,length=209709>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr5,length=181538259>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr5_GL000208v1_random,length=92689>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr6,length=170805979>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr7,length=159345973>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr8,length=145138636>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr9,length=138394717>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr9_KI270717v1_random,length=40062>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr9_KI270718v1_random,length=38054>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr9_KI270719v1_random,length=176845>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr9_KI270720v1_random,length=39050>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID=chr1_KI270762v1_alt,length=354444>"\t\t\t\t\t\t\t\t\t\r\n+"##contig=<ID'..b'+"##INFO=<ID=GTI,Number=1,Type=Integer,Description=""Number of genotyping iterations required to reach convergence or bailout."">"\t\t\t\t\t\t\t\t\t\r\n+"##INFO=<ID=TYPE,Number=A,Type=String,Description=""The type of allele, either snp, mnp, ins, del, or complex."">"\t\t\t\t\t\t\t\t\t\r\n+"##INFO=<ID=CIGAR,Number=A,Type=String,Description=""The extended CIGAR representation of each alternate allele, with the exception that \'=\' is replaced by \'M\' to ease VCF parsing.  Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR."">"\t\t\t\t\t\t\t\t\t\r\n+"##INFO=<ID=NUMALT,Number=1,Type=Integer,Description=""Number of unique non-reference alleles in called genotypes at this position."">"\t\t\t\t\t\t\t\t\t\r\n+"##INFO=<ID=MEANALT,Number=A,Type=Float,Description=""Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles."">"\t\t\t\t\t\t\t\t\t\r\n+"##INFO=<ID=LEN,Number=A,Type=Integer,Description=""allele length"">"\t\t\t\t\t\t\t\t\t\r\n+"##INFO=<ID=MQM,Number=A,Type=Float,Description=""Mean mapping quality of observed alternate alleles"">"\t\t\t\t\t\t\t\t\t\r\n+"##INFO=<ID=MQMR,Number=1,Type=Float,Description=""Mean mapping quality of observed reference alleles"">"\t\t\t\t\t\t\t\t\t\r\n+"##INFO=<ID=PAIRED,Number=A,Type=Float,Description=""Proportion of observed alternate alleles which are supported by properly paired read fragments"">"\t\t\t\t\t\t\t\t\t\r\n+"##INFO=<ID=PAIREDR,Number=1,Type=Float,Description=""Proportion of observed reference alleles which are supported by properly paired read fragments"">"\t\t\t\t\t\t\t\t\t\r\n+"##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description=""Minimum depth in gVCF output block."">"\t\t\t\t\t\t\t\t\t\r\n+"##INFO=<ID=END,Number=1,Type=Integer,Description=""Last position (inclusive) in gVCF output record."">"\t\t\t\t\t\t\t\t\t\r\n+"##FORMAT=<ID=GT,Number=1,Type=String,Description=""Genotype"">"\t\t\t\t\t\t\t\t\t\r\n+"##FORMAT=<ID=GQ,Number=1,Type=Float,Description=""Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype"">"\t\t\t\t\t\t\t\t\t\r\n+"##FORMAT=<ID=GL,Number=G,Type=Float,Description=""Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy"">"\t\t\t\t\t\t\t\t\t\r\n+"##FORMAT=<ID=DP,Number=1,Type=Integer,Description=""Read Depth"">"\t\t\t\t\t\t\t\t\t\r\n+"##FORMAT=<ID=AD,Number=R,Type=Integer,Description=""Number of observation for each allele"">"\t\t\t\t\t\t\t\t\t\r\n+"##FORMAT=<ID=RO,Number=1,Type=Integer,Description=""Reference allele observation count"">"\t\t\t\t\t\t\t\t\t\r\n+"##FORMAT=<ID=QR,Number=1,Type=Integer,Description=""Sum of quality of the reference observations"">"\t\t\t\t\t\t\t\t\t\r\n+"##FORMAT=<ID=AO,Number=A,Type=Integer,Description=""Alternate allele observation count"">"\t\t\t\t\t\t\t\t\t\r\n+"##FORMAT=<ID=QA,Number=A,Type=Integer,Description=""Sum of quality of the alternate observations"">"\t\t\t\t\t\t\t\t\t\r\n+"##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description=""Minimum depth in gVCF output block."">"\t\t\t\t\t\t\t\t\t\r\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tunknown\r\n+chr1\t156701052\t.\tC\tT\t122.853\t.\tAB=0.4;ABP=4.74748;AC=1;AF=0.5;AN=2;AO=8;CIGAR=1X;DP=20;DPB=20;DPRA=0;EPP=4.09604;EPPR=4.45795;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=11.0181;PAIRED=1;PAIREDR=0.833333;PAO=0;PQA=0;PQR=0;PRO=0;QA=255;QR=217;RO=6;RPL=0;RPP=20.3821;RPPR=4.45795;RPR=8;RUN=1;SAF=5;SAP=4.09604;SAR=3;SRF=6;SRP=16.0391;SRR=0;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t"0/1:20:6,8:6:217:8:255:-18.7505,0,-15.3493"\r\n+chr19\t18856059\t.\tC\tT\t10269.5\t.\tAB=0.248844;ABP=1306.46;AC=1;AF=0.5;AN=2;AO=592;CIGAR=1X;DP=2379;DPB=2379;DPRA=0;EPP=30.139;EPPR=172.262;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=59.9339;NS=1;NUMALT=1;ODDS=2364.65;PAIRED=0.991554;PAIREDR=0.983754;PAO=0;PQA=0;PQR=0;PRO=0;QA=21546;QR=64865;RO=1785;RPL=120;RPP=457.494;RPPR=1043.89;RPR=472;RUN=1;SAF=303;SAP=3.72923;SAR=289;SRF=873;SRP=4.86061;SRR=912;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t"0/1:2379:1785,592:1785:64865:592:21546:-1222.54,0,-5112.92"\n\\ No newline at end of file\n'
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-data/MCF7_proBed.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/MCF7_proBed.bed Thu Aug 16 12:27:35 2018 -0400
b
@@ -0,0 +1,17 @@
+chr14 94079127 94079178 ADVSAWKDLFVPGPVLR 255 - 94079127 94079178 0 1 51 0
+chr14 94079127 94079178 ADVSAWKDLFVPGPVLR 255 - 94079127 94079178 0 1 51 0
+chr19 18856027 18856078 EAIDSPVSFLVLHNQIR 255 + 18856027 18856078 0 1 51 0
+chr12 110339607 110339637 EWGSGSDILR 255 + 110339607 110339637 0 1 30 0
+chr12 110339607 110339637 EWGSGSDILR 255 + 110339607 110339637 0 1 30 0
+chr14 102083930 102083972 GVVDSENLPLNISR 255 - 102083930 102083972 0 1 42 0
+chr14 102083930 102083972 GVVDSENLPLNISR 255 - 102083930 102083972 0 1 42 0
+chr19 17205300 17206022 IQSHCSYTYGRMGEPGAEPGHFGVCVDSLTSDK 255 + 17205300 17206022 0 2 36,63 0,659
+chr2 231457346 231457474 NSTWSDDSR 255 - 231457346 231457474 0 2 -168,195 0,-67
+chr17 82082586 82082643 QGVQVQVSTSNINSLEGAR 255 - 82082586 82082643 0 1 57 0
+chr17 82082586 82082643 QGVQVQVSTSNINSLEGAR 255 - 82082586 82082643 0 1 57 0
+chr12 6561014 6561056 STGVILANDANAER 255 - 6561014 6561056 0 1 42 0
+chr12 6561014 6561056 STGVILANDANAER 255 - 6561014 6561056 0 1 42 0
+chr12 6561014 6561056 STGVILANDANAER 255 - 6561014 6561056 0 1 42 0
+chr12 6561014 6561056 STGVILANDANAER 255 - 6561014 6561056 0 1 42 0
+chr2 231457113 231457124 TLQHVLGESK 255 - 231457113 231457124 0 2 -481,511 0,-500
+chr17 2711607 2711658 VIKTDELPAAAPADSAR 255 - 2711607 2711658 0 1 51 0
[
diff -r 83181dabeb90 -r 2c7bcc1219fc test-data/[PepPointer].bed
--- a/test-data/[PepPointer].bed Fri May 18 13:25:29 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,20 +0,0 @@
-chr14 94079127 94079178 ADVSAWKDLFVPGPVLR 255 - CDS
-chr14 94079127 94079178 ADVSAWKDLFVPGPVLR 255 - CDS
-chr14 102011973 102012027 ALESLEGVEGVAHIIDPK 255 + CDS
-chr19 18856027 18856078 EAIDSPVSFLVLHNQIR 255 + CDS
-chr12 110339607 110339637 EWGSGSDILR 255 + CDS
-chr12 110339607 110339637 EWGSGSDILR 255 + CDS
-chr14 102083930 102083972 GVVDSENLPLNISR 255 - CDS
-chr14 102083930 102083972 GVVDSENLPLNISR 255 - CDS
-chr19 17205300 17206022 IQSHCSYTYGRMGEPGAEPGHFGVCVDSLTSDK 255 + SpliceJunction
-chr1 156705410 156705446 MPNFSGNWEIIR 255 - CDS
-chr1 156705410 156705446 MPNFSGNWEIIR 255 - CDS
-chr2 231457346 231457474 NSTWSDDSR 255 - SpliceJunction
-chr17 82082586 82082643 QGVQVQVSTSNINSLEGAR 255 - CDS
-chr17 82082586 82082643 QGVQVQVSTSNINSLEGAR 255 - CDS
-chr12 6561014 6561056 STGVILANDANAER 255 - CDS
-chr12 6561014 6561056 STGVILANDANAER 255 - CDS
-chr12 6561014 6561056 STGVILANDANAER 255 - CDS
-chr12 6561014 6561056 STGVILANDANAER 255 - CDS
-chr2 231457113 231457124 TLQHVLGESK 255 - SpliceJunction
-chr17 2711607 2711658 VIKTDELPAAAPADSAR 255 - CDS
[
diff -r 83181dabeb90 -r 2c7bcc1219fc test-data/[VCF-BEDintersect__on_data_65_and_data_6].vcf
--- a/test-data/[VCF-BEDintersect__on_data_65_and_data_6].vcf Fri May 18 13:25:29 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,526 +0,0 @@\n-##fileformat=VCFv4.2\n-##fileDate=20180504\n-##source=freeBayes v1.1.0-46-g8d2b3a0-dirty\n-##reference=/panfs/roc/rissdb/galaxy/genomes/hg38/seq/hg38.fa\n-##contig=<ID=chr1,length=248956422>\n-##contig=<ID=chr10,length=133797422>\n-##contig=<ID=chr11,length=135086622>\n-##contig=<ID=chr11_KI270721v1_random,length=100316>\n-##contig=<ID=chr12,length=133275309>\n-##contig=<ID=chr13,length=114364328>\n-##contig=<ID=chr14,length=107043718>\n-##contig=<ID=chr14_GL000009v2_random,length=201709>\n-##contig=<ID=chr14_GL000225v1_random,length=211173>\n-##contig=<ID=chr14_KI270722v1_random,length=194050>\n-##contig=<ID=chr14_GL000194v1_random,length=191469>\n-##contig=<ID=chr14_KI270723v1_random,length=38115>\n-##contig=<ID=chr14_KI270724v1_random,length=39555>\n-##contig=<ID=chr14_KI270725v1_random,length=172810>\n-##contig=<ID=chr14_KI270726v1_random,length=43739>\n-##contig=<ID=chr15,length=101991189>\n-##contig=<ID=chr15_KI270727v1_random,length=448248>\n-##contig=<ID=chr16,length=90338345>\n-##contig=<ID=chr16_KI270728v1_random,length=1872759>\n-##contig=<ID=chr17,length=83257441>\n-##contig=<ID=chr17_GL000205v2_random,length=185591>\n-##contig=<ID=chr17_KI270729v1_random,length=280839>\n-##contig=<ID=chr17_KI270730v1_random,length=112551>\n-##contig=<ID=chr18,length=80373285>\n-##contig=<ID=chr19,length=58617616>\n-##contig=<ID=chr1_KI270706v1_random,length=175055>\n-##contig=<ID=chr1_KI270707v1_random,length=32032>\n-##contig=<ID=chr1_KI270708v1_random,length=127682>\n-##contig=<ID=chr1_KI270709v1_random,length=66860>\n-##contig=<ID=chr1_KI270710v1_random,length=40176>\n-##contig=<ID=chr1_KI270711v1_random,length=42210>\n-##contig=<ID=chr1_KI270712v1_random,length=176043>\n-##contig=<ID=chr1_KI270713v1_random,length=40745>\n-##contig=<ID=chr1_KI270714v1_random,length=41717>\n-##contig=<ID=chr2,length=242193529>\n-##contig=<ID=chr20,length=64444167>\n-##contig=<ID=chr21,length=46709983>\n-##contig=<ID=chr22,length=50818468>\n-##contig=<ID=chr22_KI270731v1_random,length=150754>\n-##contig=<ID=chr22_KI270732v1_random,length=41543>\n-##contig=<ID=chr22_KI270733v1_random,length=179772>\n-##contig=<ID=chr22_KI270734v1_random,length=165050>\n-##contig=<ID=chr22_KI270735v1_random,length=42811>\n-##contig=<ID=chr22_KI270736v1_random,length=181920>\n-##contig=<ID=chr22_KI270737v1_random,length=103838>\n-##contig=<ID=chr22_KI270738v1_random,length=99375>\n-##contig=<ID=chr22_KI270739v1_random,length=73985>\n-##contig=<ID=chr2_KI270715v1_random,length=161471>\n-##contig=<ID=chr2_KI270716v1_random,length=153799>\n-##contig=<ID=chr3,length=198295559>\n-##contig=<ID=chr3_GL000221v1_random,length=155397>\n-##contig=<ID=chr4,length=190214555>\n-##contig=<ID=chr4_GL000008v2_random,length=209709>\n-##contig=<ID=chr5,length=181538259>\n-##contig=<ID=chr5_GL000208v1_random,length=92689>\n-##contig=<ID=chr6,length=170805979>\n-##contig=<ID=chr7,length=159345973>\n-##contig=<ID=chr8,length=145138636>\n-##contig=<ID=chr9,length=138394717>\n-##contig=<ID=chr9_KI270717v1_random,length=40062>\n-##contig=<ID=chr9_KI270718v1_random,length=38054>\n-##contig=<ID=chr9_KI270719v1_random,length=176845>\n-##contig=<ID=chr9_KI270720v1_random,length=39050>\n-##contig=<ID=chr1_KI270762v1_alt,length=354444>\n-##contig=<ID=chr1_KI270766v1_alt,length=256271>\n-##contig=<ID=chr1_KI270760v1_alt,length=109528>\n-##contig=<ID=chr1_KI270765v1_alt,length=185285>\n-##contig=<ID=chr1_GL383518v1_alt,length=182439>\n-##contig=<ID=chr1_GL383519v1_alt,length=110268>\n-##contig=<ID=chr1_GL383520v2_alt,length=366580>\n-##contig=<ID=chr1_KI270764v1_alt,length=50258>\n-##contig=<ID=chr1_KI270763v1_alt,length=911658>\n-##contig=<ID=chr1_KI270759v1_alt,length=425601>\n-##contig=<ID=chr1_KI270761v1_alt,length=165834>\n-##contig=<ID=chr2_KI270770v1_alt,length=136240>\n-##contig=<ID=chr2_KI270773v1_alt,length=70887>\n-##contig=<ID=chr2_KI270774v1_alt,length=223625>\n-##contig=<ID=chr2_KI270769v1_alt,length=120616>\n-##contig=<ID=chr2_GL383521v1_alt,length=143390>\n-##contig=<ID=chr2_KI270772v1_alt,length=133041>\n-##contig=<ID=chr2_KI270775v1_alt,length'..b'N=1;MEANALT=2;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=11243.1;PAIRED=0.985288;PAIREDR=0.980156;PAO=0;PQA=0;PQR=0;PRO=0;QA=90595;QR=244569;RO=6652;RPL=1094;RPP=62.5381;RPPR=155.737;RPR=1353;RUN=1;SAF=1227;SAP=3.05378;SAR=1220;SRF=3354;SRP=4.03401;SRR=3298;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:9100:6652,2447:6652:244569:2447:90595:-5409.03,0,-19257.6\n-chr14\t94079142\t.\tT\tC\t0\t.\tAB=0;ABP=0;AC=0;AF=0;AN=2;AO=672;CIGAR=1X;DP=3227;DPB=3227;DPRA=0;EPP=117.219;EPPR=488.229;GTI=0;LEN=1;MEANALT=3;MQM=13.9062;MQMR=8.44728;NS=1;NUMALT=1;ODDS=1413.33;PAIRED=0.995536;PAIREDR=0.994512;PAO=0;PQA=0;PQR=0;PRO=0;QA=24358;QR=92741;RO=2551;RPL=349;RPP=5.1947;RPPR=253.993;RPR=323;RUN=1;SAF=301;SAP=18.844;SAR=371;SRF=1157;SRP=50.8227;SRR=1394;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/0:3227:2551,672:2551:92741:672:24358:0,-455.681,-680.616\n-chr14\t102011985\t.\tA\tT\t12962.3\t.\tAB=0.301267;ABP=734.044;AC=1;AF=0.5;AN=2;AO=642;CIGAR=1X;DP=2131;DPB=2131;DPRA=0;EPP=6.47383;EPPR=23.6897;GTI=0;LEN=1;MEANALT=3;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=2984.69;PAIRED=0.995327;PAIREDR=0.98117;PAO=0;PQA=0;PQR=0;PRO=0;QA=22490;QR=51005;RO=1487;RPL=362;RPP=25.7533;RPPR=15.6405;RPR=280;RUN=1;SAF=307;SAP=5.66207;SAR=335;SRF=729;SRP=4.23842;SRR=758;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:2131:1487,642:1487:51005:642:22490:-1382.22,0,-3947.19\n-chr14\t102083954\t.\tC\tT\t240809\t.\tAB=0.203741;ABP=65012.9;AC=1;AF=0.5;AN=2;AO=17374;CIGAR=1X;DP=85275;DPB=85275;DPRA=0;EPP=2106.04;EPPR=12892.8;GTI=0;LEN=1;MEANALT=3;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=55448.2;PAIRED=0.982387;PAIREDR=0.980402;PAO=0;PQA=0;PQR=0;PRO=0;QA=637599;QR=2489236;RO=67865;RPL=5766;RPP=4268.59;RPPR=13677.6;RPR=11608;RUN=1;SAF=9064;SAP=74.0657;SAR=8310;SRF=35817;SRP=457.538;SRR=32048;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:85275:67865,17374:67865:2489236:17374:637599:-31686.8,0,-198225\n-chr17\t82082606\t.\tC\tT\t10374.8\t.\tAB=0.202823;ABP=2937.89;AC=1;AF=0.5;AN=2;AO=776;CIGAR=1X;DP=3826;DPB=3826;DPRA=0;EPP=22.755;EPPR=126.853;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=59.9806;NS=1;NUMALT=1;ODDS=2388.89;PAIRED=0.981959;PAIREDR=0.985569;PAO=0;PQA=0;PQR=0;PRO=0;QA=27982;QR=110520;RO=3049;RPL=338;RPP=30.9932;RPPR=253.452;RPR=438;RUN=1;SAF=422;SAP=15.9496;SAR=354;SRF=1759;SRP=159.665;SRR=1290;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:3826:3049,776:3049:110520:776:27982:-1365.58,0,-8788\n-chr19\t17205335\t.\tA\tT\t0.00158993\t.\tAB=0.285714;ABP=5.80219;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=7;DPB=7;DPRA=0;EPP=3.0103;EPPR=6.91895;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=48.2;NS=1;NUMALT=1;ODDS=7.91247;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=28;QR=169;RO=5;RPL=0;RPP=7.35324;RPPR=13.8677;RPR=2;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=1;SRP=6.91895;SRR=4;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:7:5,2:5:169:2:28:-0.55277,0,-10.4001\n-chr19\t17205444\t.\tT\tC\t206.198\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=7;CIGAR=1X;DP=7;DPB=7;DPRA=0;EPP=5.80219;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=14.3092;PAIRED=0.857143;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=249;QR=0;RO=0;RPL=2;RPP=5.80219;RPPR=0;RPR=5;RUN=1;SAF=4;SAP=3.32051;SAR=3;SRF=0;SRP=0;SRR=0;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t1/1:7:0,7:0:0:7:249:-22.753,-2.10721,0\n-chr19\t17205973\t.\tT\tC\t12243.8\t.\tAB=0;ABP=0;AC=2;AF=1;AN=2;AO=406;CIGAR=1X;DP=406;DPB=406;DPRA=0;EPP=14.3276;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=58.4015;MQMR=0;NS=1;NUMALT=1;ODDS=567.441;PAIRED=0.985222;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=14833;QR=0;RO=0;RPL=368;RPP=585.457;RPPR=0;RPR=38;RUN=1;SAF=182;SAP=12.445;SAR=224;SRF=0;SRP=0;SRR=0;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t1/1:406:0,406:0:0:406:14833:-1297.85,-122.218,0\n-chr19\t18856059\t.\tC\tT\t10269.5\t.\tAB=0.248844;ABP=1306.46;AC=1;AF=0.5;AN=2;AO=592;CIGAR=1X;DP=2379;DPB=2379;DPRA=0;EPP=30.139;EPPR=174.082;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=59.9339;NS=1;NUMALT=1;ODDS=2364.65;PAIRED=0.991554;PAIREDR=0.983754;PAO=0;PQA=0;PQR=0;PRO=0;QA=21546;QR=64865;RO=1785;RPL=120;RPP=457.494;RPPR=1048.39;RPR=472;RUN=1;SAF=303;SAP=3.72923;SAR=289;SRF=873;SRP=4.86061;SRR=912;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:2379:1785,592:1785:64865:592:21546:-1222.54,0,-5112.92\n'
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-data/error.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/error.tsv Thu Aug 16 12:27:35 2018 -0400
b
@@ -0,0 +1,12 @@
+#Input Errors Report
+#2018-08-13 15:36:32.358464
+#CRAVAT version: hybrid
+#Analysis done at http://www.cravat.us.
+#Job Id: rsajulga_20180813_113614
+#Input file: Freebayes_two_variants.vcf
+#This report shows errors that occurred in the input.
+#Input coordinate: hg38 genomic.
+#CHASM classifier: Breast
+#For more information on CRAVAT, visit http://www.cravat.us.
+
+Input line number$%$Input line UID$%$Gene$%$Error$%$Input Line
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-data/gene.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene.tsv Thu Aug 16 12:27:35 2018 -0400
b
@@ -0,0 +1,15 @@
+#Gene Level Annotation Report
+#2018-08-13 15:36:32.359533
+#CRAVAT version: hybrid
+#Analysis done at http://www.cravat.us.
+#Job Id: rsajulga_20180813_113614
+#Input file: Freebayes_two_variants.vcf
+#This report shows analysis results at gene level.
+#The composite p-value (Stouffer's combined p-value) and composite FDR of a gene show how probable it is to get the same p-value distribution for the gene as that obtained from the input variants by chance.
+#hg38 genomic.
+#Breast
+#For more information on CRAVAT, visit http://www.cravat.us.
+
+HUGO symbol Number of variants Sequence ontology CGC driver class CGC inheritance CGC tumor types somatic CGC tumor types germline ClinVar disease identifier ClinVar XRef Occurrences in COSMIC COSMIC gene count (tissue) Number of samples with gene mutated CHASM gene score CHASM gene p-value CHASM gene FDR VEST gene score (non-silent) VEST gene p-value VEST gene FDR Protein 3D gene Has a mutation in a TCGA Mutation Cluster NCI pathway hits NCI pathway IDs NCI pathway names TARGET CGL driver class
+CRABP2 1 MS 37 upper_aerodigestive_tract(3);large_intestine(9);stomach(4);soft_tissue(3);endometrium(4);lung(3);liver(2);skin(4);NS(1);prostate(1);bone(1);kidney(1);breast(1) 1 0.358 0.4176 1 ../MuPIT_Interactive?gm=chr1:156701052 0
+UPF1 1 MS 267 large_intestine(57);endometrium(18);lung(13);skin(45);meninges(1);kidney(9);thyroid(3);cervix(4);central_nervous_system(7);oesophagus(5);NS(4);upper_aerodigestive_tract(10);biliary_tract(2);stomach(15);soft_tissue(6);urinary_tract(12);breast(11);prostate(7);pancreas(7);haematopoietic_and_lymphoid_tissue(10);ovary(4);bone(2);liver(15) 1 0.63 0.0394 0.1 ../MuPIT_Interactive?gm=chr19:18856059 0
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-data/noncoding.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/noncoding.tsv Thu Aug 16 12:27:35 2018 -0400
b
@@ -0,0 +1,12 @@
+#Non-coding Variant Report
+#2018-08-13 15:36:32.354693
+#CRAVAT version: hybrid
+#Analysis done at http://www.cravat.us.
+#Job Id: rsajulga_20180813_113614
+#Input file: Freebayes_two_variants.vcf
+#This report shows analysis results at variant level.
+#hg38 genomic.
+#Breast
+#For more information on CRAVAT, visit http://www.cravat.us.
+
+Input line ID Chromosome Position Strand Reference base(s) Alternate base(s) Sample ID HUGO symbol Sequence ontology Protein sequence change CHASM p-value CHASM FDR ClinVar COSMIC ID COSMIC variant count (tissue) Number of samples with variant dbSNP ESP6500 AF (average) gnomAD AF Total gnomAD AF African gnomAD AF American gnomAD AF Ashkenazi Jewish gnomAD AF East Asian gnomAD AF Finnish gnomAD AF Non-Finnish European gnomAD AF Other gnomAD AF South Asian GWAS NHLBI Key (GRASP) GWAS PMID (GRASP) GWAS Phenotype (GRASP) Protein 3D variant In TCGA Mutation Cluster ncRNA Class ncRNA Name Pseudogene Pseudogene Transcript Repeat Class Repeat Family Repeat Name TARGET 1000 Genomes AF UTR/Intron UTR/Intron Gene UTR/Intron All Transcript Phred VCF filters Zygosity Alternate reads Total reads Variant allele frequency CGL driver class
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-data/results/intersected_vcf.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/results/intersected_vcf.vcf Thu Aug 16 12:27:35 2018 -0400
b
b'@@ -0,0 +1,516 @@\n+##fileformat=VCFv4.2\r\n+##fileDate=20180518\r\n+##source=freeBayes  v1.1.0-46-g8d2b3a0-dirty\r\n+##reference=/panfs/roc/rissdb/galaxy/genomes/hg38/seq/hg38.fa\r\n+##contig=<ID=chr1,length=248956422>\r\n+##contig=<ID=chr10,length=133797422>\r\n+##contig=<ID=chr11,length=135086622>\r\n+##contig=<ID=chr11_KI270721v1_random,length=100316>\r\n+##contig=<ID=chr12,length=133275309>\r\n+##contig=<ID=chr13,length=114364328>\r\n+##contig=<ID=chr14,length=107043718>\r\n+##contig=<ID=chr14_GL000009v2_random,length=201709>\r\n+##contig=<ID=chr14_GL000225v1_random,length=211173>\r\n+##contig=<ID=chr14_KI270722v1_random,length=194050>\r\n+##contig=<ID=chr14_GL000194v1_random,length=191469>\r\n+##contig=<ID=chr14_KI270723v1_random,length=38115>\r\n+##contig=<ID=chr14_KI270724v1_random,length=39555>\r\n+##contig=<ID=chr14_KI270725v1_random,length=172810>\r\n+##contig=<ID=chr14_KI270726v1_random,length=43739>\r\n+##contig=<ID=chr15,length=101991189>\r\n+##contig=<ID=chr15_KI270727v1_random,length=448248>\r\n+##contig=<ID=chr16,length=90338345>\r\n+##contig=<ID=chr16_KI270728v1_random,length=1872759>\r\n+##contig=<ID=chr17,length=83257441>\r\n+##contig=<ID=chr17_GL000205v2_random,length=185591>\r\n+##contig=<ID=chr17_KI270729v1_random,length=280839>\r\n+##contig=<ID=chr17_KI270730v1_random,length=112551>\r\n+##contig=<ID=chr18,length=80373285>\r\n+##contig=<ID=chr19,length=58617616>\r\n+##contig=<ID=chr1_KI270706v1_random,length=175055>\r\n+##contig=<ID=chr1_KI270707v1_random,length=32032>\r\n+##contig=<ID=chr1_KI270708v1_random,length=127682>\r\n+##contig=<ID=chr1_KI270709v1_random,length=66860>\r\n+##contig=<ID=chr1_KI270710v1_random,length=40176>\r\n+##contig=<ID=chr1_KI270711v1_random,length=42210>\r\n+##contig=<ID=chr1_KI270712v1_random,length=176043>\r\n+##contig=<ID=chr1_KI270713v1_random,length=40745>\r\n+##contig=<ID=chr1_KI270714v1_random,length=41717>\r\n+##contig=<ID=chr2,length=242193529>\r\n+##contig=<ID=chr20,length=64444167>\r\n+##contig=<ID=chr21,length=46709983>\r\n+##contig=<ID=chr22,length=50818468>\r\n+##contig=<ID=chr22_KI270731v1_random,length=150754>\r\n+##contig=<ID=chr22_KI270732v1_random,length=41543>\r\n+##contig=<ID=chr22_KI270733v1_random,length=179772>\r\n+##contig=<ID=chr22_KI270734v1_random,length=165050>\r\n+##contig=<ID=chr22_KI270735v1_random,length=42811>\r\n+##contig=<ID=chr22_KI270736v1_random,length=181920>\r\n+##contig=<ID=chr22_KI270737v1_random,length=103838>\r\n+##contig=<ID=chr22_KI270738v1_random,length=99375>\r\n+##contig=<ID=chr22_KI270739v1_random,length=73985>\r\n+##contig=<ID=chr2_KI270715v1_random,length=161471>\r\n+##contig=<ID=chr2_KI270716v1_random,length=153799>\r\n+##contig=<ID=chr3,length=198295559>\r\n+##contig=<ID=chr3_GL000221v1_random,length=155397>\r\n+##contig=<ID=chr4,length=190214555>\r\n+##contig=<ID=chr4_GL000008v2_random,length=209709>\r\n+##contig=<ID=chr5,length=181538259>\r\n+##contig=<ID=chr5_GL000208v1_random,length=92689>\r\n+##contig=<ID=chr6,length=170805979>\r\n+##contig=<ID=chr7,length=159345973>\r\n+##contig=<ID=chr8,length=145138636>\r\n+##contig=<ID=chr9,length=138394717>\r\n+##contig=<ID=chr9_KI270717v1_random,length=40062>\r\n+##contig=<ID=chr9_KI270718v1_random,length=38054>\r\n+##contig=<ID=chr9_KI270719v1_random,length=176845>\r\n+##contig=<ID=chr9_KI270720v1_random,length=39050>\r\n+##contig=<ID=chr1_KI270762v1_alt,length=354444>\r\n+##contig=<ID=chr1_KI270766v1_alt,length=256271>\r\n+##contig=<ID=chr1_KI270760v1_alt,length=109528>\r\n+##contig=<ID=chr1_KI270765v1_alt,length=185285>\r\n+##contig=<ID=chr1_GL383518v1_alt,length=182439>\r\n+##contig=<ID=chr1_GL383519v1_alt,length=110268>\r\n+##contig=<ID=chr1_GL383520v2_alt,length=366580>\r\n+##contig=<ID=chr1_KI270764v1_alt,length=50258>\r\n+##contig=<ID=chr1_KI270763v1_alt,length=911658>\r\n+##contig=<ID=chr1_KI270759v1_alt,length=425601>\r\n+##contig=<ID=chr1_KI270761v1_alt,length=165834>\r\n+##contig=<ID=chr2_KI270770v1_alt,length=136240>\r\n+##contig=<ID=chr2_KI270773v1_alt,length=70887>\r\n+##contig=<ID=chr2_KI270774v1_alt,length=223625>\r\n+##contig=<ID=chr2_KI270769v1_alt,length=120616>\r\n+##contig=<ID=chr2_GL383521v1_alt,length=143390>\r\n+##co'..b'caled  upper-bounds  estimate  of  the  probability  of  observing  the  deviation  between  EL  and  ER  given  E(EL/ER)  ~  0.5,  derived  using  Hoeffding\'s  inequality ">\r\n+##INFO=<ID=DPRA,Number=A,Type=Float,Description= "Alternate  allele  depth  ratio.    Ratio  between  depth  in  samples  with  each  called  alternate  allele  and  those  without. ">\r\n+##INFO=<ID=ODDS,Number=1,Type=Float,Description= "The  log  odds  ratio  of  the  best  genotype  combination  to  the  second-best. ">\r\n+##INFO=<ID=GTI,Number=1,Type=Integer,Description= "Number  of  genotyping  iterations  required  to  reach  convergence  or  bailout. ">\r\n+##INFO=<ID=TYPE,Number=A,Type=String,Description= "The  type  of  allele,  either  snp,  mnp,  ins,  del,  or  complex. ">\r\n+##INFO=<ID=CIGAR,Number=A,Type=String,Description= "The  extended  CIGAR  representation  of  each  alternate  allele,  with  the  exception  that  \'=\'  is  replaced  by  \'M\'  to  ease  VCF  parsing.    Note  that  INDEL  alleles  do  not  have  the  first  matched  base  (which  is  provided  by  default,  per  the  spec)  referred  to  by  the  CIGAR. ">\r\n+##INFO=<ID=NUMALT,Number=1,Type=Integer,Description= "Number  of  unique  non-reference  alleles  in  called  genotypes  at  this  position. ">\r\n+##INFO=<ID=MEANALT,Number=A,Type=Float,Description= "Mean  number  of  unique  non-reference  allele  observations  per  sample  with  the  corresponding  alternate  alleles. ">\r\n+##INFO=<ID=LEN,Number=A,Type=Integer,Description= "allele  length ">\r\n+##INFO=<ID=MQM,Number=A,Type=Float,Description= "Mean  mapping  quality  of  observed  alternate  alleles ">\r\n+##INFO=<ID=MQMR,Number=1,Type=Float,Description= "Mean  mapping  quality  of  observed  reference  alleles ">\r\n+##INFO=<ID=PAIRED,Number=A,Type=Float,Description= "Proportion  of  observed  alternate  alleles  which  are  supported  by  properly  paired  read  fragments ">\r\n+##INFO=<ID=PAIREDR,Number=1,Type=Float,Description= "Proportion  of  observed  reference  alleles  which  are  supported  by  properly  paired  read  fragments ">\r\n+##INFO=<ID=MIN_DP,Number=1,Type=Integer,Description= "Minimum  depth  in  gVCF  output  block. ">\r\n+##INFO=<ID=END,Number=1,Type=Integer,Description= "Last  position  (inclusive)  in  gVCF  output  record. ">\r\n+##FORMAT=<ID=GT,Number=1,Type=String,Description= "Genotype ">\r\n+##FORMAT=<ID=GQ,Number=1,Type=Float,Description= "Genotype  Quality,  the  Phred-scaled  marginal  (or  unconditional)  probability  of  the  called  genotype ">\r\n+##FORMAT=<ID=GL,Number=G,Type=Float,Description= "Genotype  Likelihood,  log10-scaled  likelihoods  of  the  data  given  the  called  genotype  for  each  possible  genotype  generated  from  the  reference  and  alternate  alleles  given  the  sample  ploidy ">\r\n+##FORMAT=<ID=DP,Number=1,Type=Integer,Description= "Read  Depth ">\r\n+##FORMAT=<ID=AD,Number=R,Type=Integer,Description= "Number  of  observation  for  each  allele ">\r\n+##FORMAT=<ID=RO,Number=1,Type=Integer,Description= "Reference  allele  observation  count ">\r\n+##FORMAT=<ID=QR,Number=1,Type=Integer,Description= "Sum  of  quality  of  the  reference  observations ">\r\n+##FORMAT=<ID=AO,Number=A,Type=Integer,Description= "Alternate  allele  observation  count ">\r\n+##FORMAT=<ID=QA,Number=A,Type=Integer,Description= "Sum  of  quality  of  the  alternate  observations ">\r\n+##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description= "Minimum  depth  in  gVCF  output  block. ">\r\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tunknown\r\n+chr19\t18856059\t.\tC\tT\t10269.5\t.\tAB=0.248844;ABP=1306.46;AC=1;AF=0.5;AN=2;AO=592;CIGAR=1X;DP=2379;DPB=2379;DPRA=0;EPP=30.139;EPPR=172.262;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=59.9339;NS=1;NUMALT=1;ODDS=2364.65;PAIRED=0.991554;PAIREDR=0.983754;PAO=0;PQA=0;PQR=0;PRO=0;QA=21546;QR=64865;RO=1785;RPL=120;RPP=457.494;RPPR=1043.89;RPR=472;RUN=1;SAF=303;SAP=3.72923;SAR=289;SRF=873;SRP=4.86061;SRR=912;TYPE=snp\tGT:DP:AD:RO:QR:AO:QA:GL\t0/1:2379:1785,592:1785:64865:592:21546:-1222.54,0,-5112.92\r\n'
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-data/variant.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/variant.tsv Thu Aug 16 12:27:35 2018 -0400
b
@@ -0,0 +1,13 @@
+#Variant Report
+#2018-08-13 15:36:32.354483
+#CRAVAT version: hybrid
+#Analysis done at http://www.cravat.us.
+#Job Id: rsajulga_20180813_113614
+#Input file: Freebayes_two_variants.vcf
+#This report shows analysis results at variant level.
+#hg38 genomic.
+#Breast
+#For more information on CRAVAT, visit http://www.cravat.us.
+Input line ID Chromosome Position Strand Reference base(s) Alternate base(s) Sample ID HUGO symbol Sequence ontology Protein sequence change Reference peptide Variant peptide CHASM p-value CHASM FDR ClinVar COSMIC ID COSMIC variant count (tissue) Number of samples with variant dbSNP ESP6500 AF (average) gnomAD AF Total gnomAD AF African gnomAD AF American gnomAD AF Ashkenazi Jewish gnomAD AF East Asian gnomAD AF Finnish gnomAD AF Non-Finnish European gnomAD AF Other gnomAD AF South Asian GWAS NHLBI Key (GRASP) GWAS PMID (GRASP) GWAS Phenotype (GRASP) Protein 3D variant In TCGA Mutation Cluster ncRNA Class ncRNA Name Pseudogene Pseudogene Transcript Repeat Class Repeat Family Repeat Name TARGET 1000 Genomes AF UTR/Intron UTR/Intron Gene UTR/Intron All Transcript Phred VCF filters Zygosity Alternate reads Total reads Variant allele frequency CGL driver class S.O. transcript S.O. transcript strand S.O. all transcripts CGC driver class CGC inheritance CGC tumor types somatic CGC tumor types germline CHASM transcript CHASM score All transcripts CHASM results ClinVar disease identifier ClinVar XRef COSMIC transcript COSMIC protein change COSMIC variant count ESP6500 AF (European American) ESP6500 AF (African American) HGVS Genomic HGVS Protein HGVS Protein All NCI pathway hits NCI pathway IDs NCI pathway names
+1 VAR516_unknown chr1 156701052 + C T unknown CRABP2 MS G24E 0.4176 1 0.0 4.07800406169e-06 2.98044825942e-05 ../MuPIT_Interactive?gm=chr1:156701052 0 122.853 . het 8 20 0.4 ENST00000368221.1 - *ENST00000368221.1:G24E(MS),ENST00000621784.4:G24E(MS),ENST00000368222.7:G24E(MS) ENST00000368221.1 0.358 *ENST00000368221.1:G24E(0.358:0.4176),ENST00000368222.7:G24E(0.358:0.4176),ENST00000621784.4:G24E(0.358:0.4176) 0 0 NC_000001.10:g.156701052C>T ENST00000368221.1:p.Gly24Glu *ENST00000368221.1:p.Gly24Glu,ENST00000621784.4:p.Gly24Glu,ENST00000368222.7:p.Gly24Glu 0
+2 VAR517_unknown chr19 18856059 + C T unknown UPF1 MS A571V EAIDSPVSFLALHNQIR EAIDSPVSFLVLHNQIR 0.0394 COSM3100527 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr19:18856059 0 10269.5 . het 592 2379 0.248844052123 ENST00000599848.5 + ENST00000262803.9:A560V(MS),*ENST00000599848.5:A571V(MS) ENST00000262803.9 0.63 *ENST00000599848.5:A571V(0.61:0.0530),ENST00000262803.9:A560V(0.63:0.0394) ENST00000262803 p.A560V (large_intestine 1) 1 0 0 NC_000019.10:g.18856059C>T ENST00000599848.5:p.Ala571Val ENST00000262803.9:p.Ala560Val,*ENST00000599848.5:p.Ala571Val 0
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-results/Additional_Details.tsv
--- a/test-results/Additional_Details.tsv Fri May 18 13:25:29 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,21 +0,0 @@
-#Variant Additional Details Report
-#2018-05-18 15:15:25.120629
-#CRAVAT version: hybrid
-#Analysis done at http://www.cravat.us.
-#Job Id: znylund_20180518_111521
-#Input file: _VCF_BEDintersect_on_data_65_and_data_6_.vcf
-#This report shows analysis results at variant level.
-#hg38 genomic.
-#N/A
-#For more information on CRAVAT, visit http://www.cravat.us.
-
-Input line ID Chromosome Position Strand Reference base(s) Alternate base(s) Sample ID HUGO symbol Sequence ontology S.O. transcript S.O. transcript strand Protein sequence change S.O. all transcripts CGC driver class CGC inheritance CGC tumor types somatic CGC tumor types germline ClinVar disease identifier ClinVar XRef COSMIC transcript COSMIC protein change COSMIC variant count ESP6500 AF (European American) ESP6500 AF (African American) HGVS Genomic HGVS Protein HGVS Protein All NCI pathway hits NCI pathway IDs NCI pathway names VEST score transcript VEST p-value VEST score (missense) VEST score (frameshift indels) VEST score (inframe indels) VEST score (stop-gain) VEST score (stop-loss) VEST score (splice site) All transcripts VEST results
-1 VAR516_unknown chr1 156705422 + T C unknown CRABP2 MS ENST00000368221.1 - K9E *ENST00000368221.1:K9E(MS),ENST00000621784.4:K9E(MS),ENST00000368222.7:K9E(MS) ENST00000368221 p.K9E (large_intestine 1) 1 0 0 NC_000001.10:g.156705422T>C ENST00000368221.1:p.Lys9Glu *ENST00000368221.1:p.Lys9Glu,ENST00000621784.4:p.Lys9Glu,ENST00000368222.7:p.Lys9Glu 0 ENST00000368221.1:K9E 0.53061 0.2 *ENST00000368221.1:K9E(0.2:0.53061),ENST00000368222.7:K9E(0.187:0.5543),ENST00000621784.4:K9E(0.186:0.55652)
-2 VAR517_unknown chr12 6561055 + T C unknown NOP2 MS ENST00000616948.4 - N408S ENST00000617555.4:N404S(MS),ENST00000545200.5:N404S(MS),ENST00000541778.5:N404S(MS),ENST00000399466.6:N404S(MS),ENST00000322166.9:N408S(MS),ENST00000537442.5:N408S(MS),ENST00000382421.7:N441S(MS),ENST00000620535.4:N441S(MS),*ENST00000616948.4:N408S(MS) 0 0 NC_000012.10:g.6561055T>C ENST00000616948.4:p.Asn408Ser ENST00000617555.4:p.Asn404Ser,ENST00000545200.5:p.Asn404Ser,ENST00000541778.5:p.Asn404Ser,ENST00000399466.6:p.Asn404Ser,ENST00000322166.9:p.Asn408Ser,ENST00000537442.5:p.Asn408Ser,ENST00000382421.7:p.Asn441Ser,ENST00000620535.4:p.Asn441Ser,*ENST00000616948.4:p.Asn408Ser 0 ENST00000616948.4:N408S 0.00324 0.958 ENST00000617555.4:N404S(0.954:0.00354),*ENST00000616948.4:N408S(0.958:0.00324),ENST00000541778.5:N404S(0.869:0.01488),ENST00000537442.5:N408S(0.956:0.00344),ENST00000399466.6:N404S(0.951:0.00374),ENST00000545200.5:N404S(0.953:0.00354),ENST00000620535.4:N441S(0.938:0.00536),ENST00000382421.7:N441S(0.938:0.00536),ENST00000322166.9:N408S(0.954:0.00354)
-3 VAR518_unknown chr12 110339630 + C T unknown ATP2A2 MS ENST00000539276.6 + T557I ENST00000308664.10:T557I(MS),*ENST00000539276.6:T557I(MS) 0 0 NC_000012.10:g.110339630C>T ENST00000539276.6:p.Thr557Ile ENST00000308664.10:p.Thr557Ile,*ENST00000539276.6:p.Thr557Ile 0 ENST00000539276.6:T557I 0.00374 0.951 ENST00000308664.10:T557I(0.822:0.02459),*ENST00000539276.6:T557I(0.951:0.00374)
-4 VAR520_unknown chr14 102011985 + A T unknown DYNC1H1 MS ENST00000360184.8 + R2243S *ENST00000360184.8:R2243S(MS) ENST00000360184 p.R2243S (large_intestine 1) 1 0 0 NC_000014.10:g.102011985A>T ENST00000360184.8:p.Arg2243Ser *ENST00000360184.8:p.Arg2243Ser 1 94da5dd8-5521-11e7-8f50-0ac135e8bacf Lissencephaly gene (LIS1) in neuronal migration and development ENST00000360184.8:R2243S 0.02307 0.829 *ENST00000360184.8:R2243S(0.829:0.02307)
-5 VAR521_unknown chr14 102083954 + C T unknown HSP90AA1 MS ENST00000334701.11 - D515N ENST00000216281.12:D393N(MS),*ENST00000334701.11:D515N(MS) somatic NHL ENST00000334701 p.D515N (large_intestine 1) 1 0 0 NC_000014.10:g.102083954C>T ENST00000334701.11:p.Asp515Asn ENST00000216281.12:p.Asp393Asn,*ENST00000334701.11:p.Asp515Asn 15 3814fa62-5521-11e7-8f50-0ac135e8bacf,32ff1916-5521-11e7-8f50-0ac135e8bacf,a8411a5c-5521-11e7-8f50-0ac135e8bacf,541d7e20-5521-11e7-8f50-0ac135e8bacf,98ad85f0-5521-11e7-8f50-0ac135e8bacf,9697501e-5521-11e7-8f50-0ac135e8bacf,e6f69242-5521-11e7-8f50-0ac135e8bacf,603902ca-5521-11e7-8f50-0ac135e8bacf,bb3d7c4a-5521-11e7-8f50-0ac135e8bacf,b1ac7318-5521-11e7-8f50-0ac135e8bacf,cb348d72-5521-11e7-8f50-0ac135e8bacf,945aa686-5521-11e7-8f50-0ac135e8bacf,4c90f780-5521-11e7-8f50-0ac135e8bacf,e4e93610-5521-11e7-8f50-0ac135e8bacf,6f7a316e-5521-11e7-8f50-0ac135e8bacf Validated targets of C-MYC transcriptional activation@VEGFR1 specific signals@IL2 signaling events mediated by PI3K@Signaling events mediated by HDAC Class II@Integrin-linked kinase signaling@Integrins in angiogenesis@Class I PI3K signaling events mediated by Akt@Regulation of Telomerase@Glucocorticoid receptor regulatory network@Hypoxic and oxygen homeostasis regulation of HIF-1-alpha@ErbB receptor signaling network@LKB1 signaling events@Signaling events mediated by VEGFR1 and VEGFR2@Class I PI3K signaling events@Regulation of Androgen receptor activity ENST00000216281.12:D393N 0.02014 0.84 ENST00000334701.11:D515N(0.749:0.04989),*ENST00000216281.12:D393N(0.84:0.02014)
-6 VAR522_unknown chr17 82082606 + C T unknown FASN MS ENST00000306749.3 - S1947N ENST00000634990.1:S1945N(MS),*ENST00000306749.3:S1947N(MS) ENST00000306749 p.S1947N (large_intestine 1) 1 0 0 NC_000017.10:g.82082606C>T ENST00000306749.3:p.Ser1947Asn ENST00000634990.1:p.Ser1945Asn,*ENST00000306749.3:p.Ser1947Asn 2 34a994cc-5521-11e7-8f50-0ac135e8bacf,812903c2-5521-11e7-8f50-0ac135e8bacf Validated transcriptional targets of deltaNp63 isoforms@p73 transcription factor network ENST00000634990.1:S1945N 0.18611 0.501 ENST00000306749.3:S1947N(0.493:0.19016),*ENST00000634990.1:S1945N(0.501:0.18611)
-7 VAR523_unknown chr19 17205335 + A T unknown MYO9B MS ENST00000594824.5 + K1688M ENST00000397274.6:K1688M(MS),ENST00000595618.5:K1688M(MS),*ENST00000594824.5:K1688M(MS) 0 0 NC_000019.10:g.17205335A>T ENST00000594824.5:p.Lys1688Met ENST00000397274.6:p.Lys1688Met,ENST00000595618.5:p.Lys1688Met,*ENST00000594824.5:p.Lys1688Met 1 60f3521c-5521-11e7-8f50-0ac135e8bacf Regulation of RhoA activity ENST00000397274.6:K1688M 0.20028 0.473 ENST00000594824.5:K1688M(0.464:0.20464),*ENST00000397274.6:K1688M(0.473:0.20028),ENST00000595618.5:K1688M(0.469:0.20231)
-9 VAR525_unknown chr19 17205973 + T C unknown MYO9B MS ENST00000594824.5 + V1693A ENST00000397274.6:V1693A(MS),ENST00000595618.5:V1693A(MS),*ENST00000594824.5:V1693A(MS) ENST00000319396 p.V1693A (thyroid 2) 2 0.399857 0.645631 NC_000019.10:g.17205973T>C ENST00000594824.5:p.Val1693Ala ENST00000397274.6:p.Val1693Ala,ENST00000595618.5:p.Val1693Ala,*ENST00000594824.5:p.Val1693Ala 1 60f3521c-5521-11e7-8f50-0ac135e8bacf Regulation of RhoA activity ENST00000397274.6:V1693A 0.95254 0.045 ENST00000594824.5:V1693A(0.025:0.98158),*ENST00000397274.6:V1693A(0.045:0.95254),ENST00000595618.5:V1693A(0.042:0.95749)
-10 VAR526_unknown chr19 18856059 + C T unknown UPF1 MS ENST00000599848.5 + A571V ENST00000262803.9:A560V(MS),*ENST00000599848.5:A571V(MS) ENST00000262803 p.A560V (large_intestine 1) 1 0 0 NC_000019.10:g.18856059C>T ENST00000599848.5:p.Ala571Val ENST00000262803.9:p.Ala560Val,*ENST00000599848.5:p.Ala571Val 0 ENST00000262803.9:A560V 0.09372 0.662 ENST00000599848.5:A571V(0.643:0.10292),*ENST00000262803.9:A560V(0.662:0.09372)
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-results/Gene_Level_Analysis.tsv
--- a/test-results/Gene_Level_Analysis.tsv Fri May 18 13:25:29 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,21 +0,0 @@
-#Gene Level Annotation Report
-#2018-05-18 15:18:04.023450
-#CRAVAT version: hybrid
-#Analysis done at http://www.cravat.us.
-#Job Id: znylund_20180518_111800
-#Input file: _VCF_BEDintersect_on_data_65_and_data_6_.vcf
-#This report shows analysis results at gene level.
-#The composite p-value (Stouffer's combined p-value) and composite FDR of a gene show how probable it is to get the same p-value distribution for the gene as that obtained from the input variants by chance.
-#hg38 genomic.
-#N/A
-#For more information on CRAVAT, visit http://www.cravat.us.
-
-HUGO symbol Number of variants Sequence ontology CGC driver class CGC inheritance CGC tumor types somatic CGC tumor types germline ClinVar disease identifier ClinVar XRef Occurrences in COSMIC COSMIC gene count (tissue) Number of samples with gene mutated CHASM gene score CHASM gene p-value CHASM gene FDR VEST gene score (non-silent) VEST gene p-value VEST gene FDR Protein 3D gene Has a mutation in a TCGA Mutation Cluster NCI pathway hits NCI pathway IDs NCI pathway names TARGET CGL driver class
-CRABP2 1 MS 37 upper_aerodigestive_tract(3);large_intestine(9);stomach(4);soft_tissue(3);endometrium(4);lung(3);liver(2);skin(4);NS(1);prostate(1);bone(1);kidney(1);breast(1) 1 0.2 0.53061 0.65 ../MuPIT_Interactive?gm=chr1:156705422 0
-NOP2 1 MS 133 large_intestine(22);endometrium(8);lung(8);skin(20);kidney(5);thyroid(1);cervix(2);central_nervous_system(3);oesophagus(4);NS(2);upper_aerodigestive_tract(5);stomach(8);soft_tissue(1);urinary_tract(4);breast(12);prostate(5);pituitary(1);pancreas(5);adrenal_gland(1);haematopoietic_and_lymphoid_tissue(2);ovary(5);liver(9) 1 0.958 0.00324 0.05 ../MuPIT_Interactive?gm=chr12:6561055 0
-ATP2A2 1 MS C0022595 OMIM:124200 200 large_intestine(31);endometrium(12);lung(7);skin(43);autonomic_ganglia(1);kidney(9);thyroid(3);cervix(2);testis(1);oesophagus(4);NS(2);upper_aerodigestive_tract(6);biliary_tract(6);stomach(14);soft_tissue(2);urinary_tract(8);breast(7);prostate(8);pancreas(6);small_intestine(2);haematopoietic_and_lymphoid_tissue(5);ovary(3);bone(2);liver(16) 1 0.951 0.00374 0.05 ../MuPIT_Interactive?gm=chr12:110339630 0
-DYNC1H1 1 MS C1834690 OMIM:158600 955 large_intestine(151);pleura(1);endometrium(79);lung(62);skin(151);autonomic_ganglia(1);kidney(27);thyroid(8);cervix(13);testis(1);central_nervous_system(12);oesophagus(28);NS(19);upper_aerodigestive_tract(31);biliary_tract(6);stomach(60);soft_tissue(13);urinary_tract(25);breast(50);prostate(38);pancreas(18);adrenal_gland(3);meninges(1);small_intestine(3);haematopoietic_and_lymphoid_tissue(37);ovary(20);bone(4);liver(93) 1 0.829 0.02307 0.1 ../MuPIT_Interactive?gm=chr14:102011985 1 94da5dd8-5521-11e7-8f50-0ac135e8bacf Lissencephaly gene (LIS1) in neuronal migration and development
-HSP90AA1 1 MS somatic NHL 174 large_intestine(19);endometrium(9);lung(9);skin(25);kidney(12);thyroid(2);cervix(4);central_nervous_system(2);oesophagus(9);NS(5);biliary_tract(2);stomach(16);soft_tissue(5);urinary_tract(15);liver(11);prostate(3);pancreas(1);salivary_gland(1);haematopoietic_and_lymphoid_tissue(5);ovary(8);bone(1);breast(10) 1 0.84 0.02014 0.1 ../MuPIT_Interactive?gm=chr14:102083954 15 3814fa62-5521-11e7-8f50-0ac135e8bacf,32ff1916-5521-11e7-8f50-0ac135e8bacf,a8411a5c-5521-11e7-8f50-0ac135e8bacf,541d7e20-5521-11e7-8f50-0ac135e8bacf,98ad85f0-5521-11e7-8f50-0ac135e8bacf,9697501e-5521-11e7-8f50-0ac135e8bacf,e6f69242-5521-11e7-8f50-0ac135e8bacf,603902ca-5521-11e7-8f50-0ac135e8bacf,bb3d7c4a-5521-11e7-8f50-0ac135e8bacf,b1ac7318-5521-11e7-8f50-0ac135e8bacf,cb348d72-5521-11e7-8f50-0ac135e8bacf,945aa686-5521-11e7-8f50-0ac135e8bacf,4c90f780-5521-11e7-8f50-0ac135e8bacf,e4e93610-5521-11e7-8f50-0ac135e8bacf,6f7a316e-5521-11e7-8f50-0ac135e8bacf Validated targets of C-MYC transcriptional activation@VEGFR1 specific signals@IL2 signaling events mediated by PI3K@Signaling events mediated by HDAC Class II@Integrin-linked kinase signaling@Integrins in angiogenesis@Class I PI3K signaling events mediated by Akt@Regulation of Telomerase@Glucocorticoid receptor regulatory network@Hypoxic and oxygen homeostasis regulation of HIF-1-alpha@ErbB receptor signaling network@LKB1 signaling events@Signaling events mediated by VEGFR1 and VEGFR2@Class I PI3K signaling events@Regulation of Androgen receptor activity
-FASN 1 MS 621 large_intestine(163);endometrium(22);lung(19);skin(82);autonomic_ganglia(1);kidney(9);thyroid(7);cervix(9);central_nervous_system(7);genital_tract(1);oesophagus(24);NS(13);upper_aerodigestive_tract(21);biliary_tract(12);stomach(39);soft_tissue(9);urinary_tract(2);liver(79);prostate(20);pancreas(14);adrenal_gland(2);salivary_gland(3);small_intestine(4);haematopoietic_and_lymphoid_tissue(14);ovary(3);bone(5);breast(37) 1 0.501 0.18611 0.25 ../MuPIT_Interactive?gm=chr17:82082606 2 34a994cc-5521-11e7-8f50-0ac135e8bacf,812903c2-5521-11e7-8f50-0ac135e8bacf Validated transcriptional targets of deltaNp63 isoforms@p73 transcription factor network
-MYO9B 2 MS C1857847 OMIM:609753 424 large_intestine(96);endometrium(33);lung(17);skin(57);kidney(12);thyroid(9);cervix(4);central_nervous_system(9);oesophagus(22);NS(8);upper_aerodigestive_tract(22);biliary_tract(9);stomach(24);soft_tissue(10);urinary_tract(8);breast(17);prostate(14);pancreas(15);adrenal_gland(4);haematopoietic_and_lymphoid_tissue(8);ovary(1);bone(4);liver(21) 1 0.473 0.721215732958585 1 ../MuPIT_Interactive?gm=chr19:17205335,chr19:17205973 1 60f3521c-5521-11e7-8f50-0ac135e8bacf Regulation of RhoA activity
-UPF1 1 MS 267 large_intestine(57);endometrium(18);lung(13);skin(45);meninges(1);kidney(9);thyroid(3);cervix(4);central_nervous_system(7);oesophagus(5);NS(4);upper_aerodigestive_tract(10);biliary_tract(2);stomach(15);soft_tissue(6);urinary_tract(12);breast(11);prostate(7);pancreas(7);haematopoietic_and_lymphoid_tissue(10);ovary(4);bone(2);liver(15) 1 0.662 0.09372 0.15 ../MuPIT_Interactive?gm=chr19:18856059
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-results/Input_Errors.Result.tsv
--- a/test-results/Input_Errors.Result.tsv Fri May 18 13:25:29 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,12 +0,0 @@
-#Input Errors Report
-#2018-05-18 15:18:04.022947
-#CRAVAT version: hybrid
-#Analysis done at http://www.cravat.us.
-#Job Id: znylund_20180518_111800
-#Input file: _VCF_BEDintersect_on_data_65_and_data_6_.vcf
-#This report shows errors that occurred in the input.
-#Input coordinate: hg38 genomic.
-#CHASM classifier: N/A
-#For more information on CRAVAT, visit http://www.cravat.us.
-
-Input line number$%$Input line UID$%$Gene$%$Error$%$Input Line
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-results/Variant_Non-coding.Result.tsv
--- a/test-results/Variant_Non-coding.Result.tsv Fri May 18 13:25:29 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,13 +0,0 @@
-#Non-coding Variant Report
-#2018-05-18 15:18:04.020642
-#CRAVAT version: hybrid
-#Analysis done at http://www.cravat.us.
-#Job Id: znylund_20180518_111800
-#Input file: _VCF_BEDintersect_on_data_65_and_data_6_.vcf
-#This report shows analysis results at variant level.
-#hg38 genomic.
-#N/A
-#For more information on CRAVAT, visit http://www.cravat.us.
-
-Input line ID Chromosome Position Strand Reference base(s) Alternate base(s) Sample ID HUGO symbol Sequence ontology Protein sequence change ClinVar COSMIC ID COSMIC variant count (tissue) Number of samples with variant dbSNP ESP6500 AF (average) gnomAD AF Total gnomAD AF African gnomAD AF American gnomAD AF Ashkenazi Jewish gnomAD AF East Asian gnomAD AF Finnish gnomAD AF Non-Finnish European gnomAD AF Other gnomAD AF South Asian GWAS NHLBI Key (GRASP) GWAS PMID (GRASP) GWAS Phenotype (GRASP) Protein 3D variant In TCGA Mutation Cluster ncRNA Class ncRNA Name Pseudogene Pseudogene Transcript Repeat Class Repeat Family Repeat Name TARGET 1000 Genomes AF UTR/Intron UTR/Intron Gene UTR/Intron All Transcript Phred VCF filters Zygosity Alternate reads Total reads Variant allele frequency VEST p-value VEST FDR CGL driver class
-8 VAR524_unknown chr19 17205444 + T C unknown Non-Coding 1 rs2305763 0.0 0.510502431118 0.684095610205 0.693317422434 0.337748344371 0.795930949445 0.435243553009 0.393043827905 0.449691991786 0.629792 intron MYO9B ENST00000595618.5(intron),ENST00000594824.5(intron),ENST00000397274.6(intron) 206.198 . hom 14 7 2.0
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-results/Variant_Result.tsv
--- a/test-results/Variant_Result.tsv Fri May 18 13:25:29 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,21 +0,0 @@
-#Variant Report
-#2018-05-18 15:15:25.119179
-#CRAVAT version: hybrid
-#Analysis done at http://www.cravat.us.
-#Job Id: znylund_20180518_111521
-#Input file: _VCF_BEDintersect_on_data_65_and_data_6_.vcf
-#This report shows analysis results at variant level.
-#hg38 genomic.
-#N/A
-#For more information on CRAVAT, visit http://www.cravat.us.
-
-Input line ID Chromosome Position Strand Reference base(s) Alternate base(s) Sample ID HUGO symbol Sequence ontology Protein sequence change ClinVar COSMIC ID COSMIC variant count (tissue) Number of samples with variant dbSNP ESP6500 AF (average) gnomAD AF Total gnomAD AF African gnomAD AF American gnomAD AF Ashkenazi Jewish gnomAD AF East Asian gnomAD AF Finnish gnomAD AF Non-Finnish European gnomAD AF Other gnomAD AF South Asian GWAS NHLBI Key (GRASP) GWAS PMID (GRASP) GWAS Phenotype (GRASP) Protein 3D variant In TCGA Mutation Cluster ncRNA Class ncRNA Name Pseudogene Pseudogene Transcript Repeat Class Repeat Family Repeat Name TARGET 1000 Genomes AF UTR/Intron UTR/Intron Gene UTR/Intron All Transcript Phred VCF filters Zygosity Alternate reads Total reads Variant allele frequency VEST p-value VEST FDR CGL driver class
-1 VAR516_unknown chr1 156705422 + T C unknown CRABP2 MS K9E COSM1984142 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr1:156705422 0 125181 . het 5739 18018 0.318514818515 0.53061
-2 VAR517_unknown chr12 6561055 + T C unknown NOP2 MS N408S 1 0.0 ../MuPIT_Interactive?gm=chr12:6561055 0 14340.8 . het 910 3868 0.235263702172 0.00324
-3 VAR518_unknown chr12 110339630 + C T unknown ATP2A2 MS T557I 1 0.0 ../MuPIT_Interactive?gm=chr12:110339630 0 48828.1 . het 2447 9100 0.268901098901 0.00374
-4 VAR520_unknown chr14 102011985 + A T unknown DYNC1H1 MS R2243S COSM2262213 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr14:102011985 0 12962.3 . het 642 2131 0.301267010793 0.02307
-5 VAR521_unknown chr14 102083954 + C T unknown HSP90AA1 MS D515N COSM2262393 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr14:102083954 0 240809 . het 17374 85275 0.203740838464 0.02014
-6 VAR522_unknown chr17 82082606 + C T unknown FASN MS S1947N COSM4648107 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr17:82082606 0 10374.8 . het 776 3826 0.202822791427 0.18611
-7 VAR523_unknown chr19 17205335 + A T unknown MYO9B MS K1688M 1 0.0 ../MuPIT_Interactive?gm=chr19:17205335 0 0.00158993 . het 2 7 0.285714285714 0.20028
-9 VAR525_unknown chr19 17205973 + T C unknown MYO9B MS V1693A COSM438878 thyroid(2) 1 rs7248508 0.522744 0.526958747465 0.685404424473 0.728029336735 0.352247807018 0.776672496721 0.453231381586 0.406255640183 0.486462728551 0.596900776808 190609063506732,203395363506733,206865653506734,208819603506735,224846277709335,224792027709336 19060906,20339536,20686565,20881960,22484627,22479202 LDL cholesterol(0.0152),HDL cholesterol(0.0279),Triglycerides(0.0141),Height(0.0104),Obesity with early age of onset (age >2)(0.0471),Adiponectin levels(0.0294) ../MuPIT_Interactive?gm=chr19:17205973 0.631589 12243.8 . hom 812 406 2.0 0.95254
-10 VAR526_unknown chr19 18856059 + C T unknown UPF1 MS A571V COSM3100527 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr19:18856059 0 10269.5 . het 592 2379 0.248844052123 0.09372
b
diff -r 83181dabeb90 -r 2c7bcc1219fc test-results/combined_variants.tsv
--- a/test-results/combined_variants.tsv Fri May 18 13:25:29 2018 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,21 +0,0 @@\n-#Variant Additional Details Report\r\n-#2018-05-18 15:15:25.120629\r\n-#CRAVAT version: hybrid\r\n-#Analysis done at http://www.cravat.us.\r\n-#Job Id: znylund_20180518_111521\r\n-#Input file: _VCF_BEDintersect_on_data_65_and_data_6_.vcf\r\n-#This report shows analysis results at variant level.\r\n-#hg38 genomic.\r\n-#N/A\r\n-#For more information on CRAVAT, visit http://www.cravat.us.\r\n-\r\n-Input line\tID\tChromosome\tPosition\tStrand\tReference base(s)\tAlternate base(s)\tSample ID\tHUGO symbol\tSequence ontology\tS.O. transcript\tS.O. transcript strand\tReference peptide\tVariant peptide\tProtein sequence change\tS.O. all transcripts\tCGC driver class\tCGC inheritance\tCGC tumor types somatic\tCGC tumor types germline\tClinVar disease identifier\tClinVar XRef\tCOSMIC transcript\tCOSMIC protein change\tCOSMIC variant count\tESP6500 AF (European American)\tESP6500 AF (African American)\tHGVS Genomic\tHGVS Protein\tHGVS Protein All\tNCI pathway hits\tNCI pathway IDs\tNCI pathway names\tVEST score transcript\tVEST p-value\tVEST score (missense)\tVEST score (frameshift indels)\tVEST score (inframe indels)\tVEST score (stop-gain)\tVEST score (stop-loss)\tVEST score (splice site)\tAll transcripts VEST results\tClinVar\tCOSMIC ID\tCOSMIC variant count (tissue)\tNumber of samples with variant\tdbSNP\tESP6500 AF (average)\tgnomAD AF Total\tgnomAD AF African\tgnomAD AF American\tgnomAD AF Ashkenazi Jewish\tgnomAD AF East Asian\tgnomAD AF Finnish\tgnomAD AF Non-Finnish European\tgnomAD AF Other\tgnomAD AF South Asian\tGWAS NHLBI Key (GRASP)\tGWAS PMID (GRASP)\tGWAS Phenotype (GRASP)\tProtein 3D variant\tIn TCGA Mutation Cluster\tncRNA Class\tncRNA Name\tPseudogene\tPseudogene Transcript\tRepeat Class\tRepeat Family\tRepeat Name\tTARGET\t1000 Genomes AF\tUTR/Intron\tUTR/Intron Gene\tUTR/Intron All Transcript\tPhred\tVCF filters\tZygosity\tAlternate reads\tTotal reads\tVariant allele frequency\tVEST FDR\tCGL driver class\r\n-1\tVAR516_unknown\tchr1\t156705422\t+\tT\tC\tunknown\tCRABP2\tMS\tENST00000368221.1\t-\tMPNFSGNWKIIR\tMPNFSGNWEIIR\tK9E\t*ENST00000368221.1:K9E(MS),ENST00000621784.4:K9E(MS),ENST00000368222.7:K9E(MS)\t\t\t\t\t\t\tENST00000368221\tp.K9E (large_intestine 1)\t1\t0\t0\tNC_000001.10:g.156705422T>C\tENST00000368221.1:p.Lys9Glu\t*ENST00000368221.1:p.Lys9Glu,ENST00000621784.4:p.Lys9Glu,ENST00000368222.7:p.Lys9Glu\t0\t\t\tENST00000368221.1:K9E\t0.53061\t0.2\t\t\t\t\t\t*ENST00000368221.1:K9E(0.2:0.53061),ENST00000368222.7:K9E(0.187:0.5543),ENST00000621784.4:K9E(0.186:0.55652)\t\tCOSM1984142\tlarge_intestine(1)\t1\t\t0.0\t\t\t\t\t\t\t\t\t\t\t\t\t../MuPIT_Interactive?gm=chr1:156705422\t\t\t\t\t\t\t\t\t\t0\t\t\t\t125181\t.\thet\t5739\t18018\t0.318514818515\t\t\r\n-2\tVAR517_unknown\tchr12\t6561055\t+\tT\tC\tunknown\tNOP2\tMS\tENST00000616948.4\t-\tNTGVILANDANAER\tSTGVILANDANAER\tN408S\tENST00000617555.4:N404S(MS),ENST00000545200.5:N404S(MS),ENST00000541778.5:N404S(MS),ENST00000399466.6:N404S(MS),ENST00000322166.9:N408S(MS),ENST00000537442.5:N408S(MS),ENST00000382421.7:N441S(MS),ENST00000620535.4:N441S(MS),*ENST00000616948.4:N408S(MS)\t\t\t\t\t\t\t\t\t\t0\t0\tNC_000012.10:g.6561055T>C\tENST00000616948.4:p.Asn408Ser\tENST00000617555.4:p.Asn404Ser,ENST00000545200.5:p.Asn404Ser,ENST00000541778.5:p.Asn404Ser,ENST00000399466.6:p.Asn404Ser,ENST00000322166.9:p.Asn408Ser,ENST00000537442.5:p.Asn408Ser,ENST00000382421.7:p.Asn441Ser,ENST00000620535.4:p.Asn441Ser,*ENST00000616948.4:p.Asn408Ser\t0\t\t\tENST00000616948.4:N408S\t0.00324\t0.958\t\t\t\t\t\tENST00000617555.4:N404S(0.954:0.00354),*ENST00000616948.4:N408S(0.958:0.00324),ENST00000541778.5:N404S(0.869:0.01488),ENST00000537442.5:N408S(0.956:0.00344),ENST00000399466.6:N404S(0.951:0.00374),ENST00000545200.5:N404S(0.953:0.00354),ENST00000620535.4:N441S(0.938:0.00536),ENST00000382421.7:N441S(0.938:0.00536),ENST00000322166.9:N408S(0.954:0.00354)\t\t\t\t1\t\t0.0\t\t\t\t\t\t\t\t\t\t\t\t\t../MuPIT_Interactive?gm=chr12:6561055\t\t\t\t\t\t\t\t\t\t0\t\t\t\t14340.8\t.\thet\t910\t3868\t0.235263702172\t\t\r\n-3\tVAR518_unknown\tchr12\t110339630\t+\tC\tT\tunknown\tATP2A2\tMS\tENST00000539276.6\t+\tEWGSGSDTLR\tEWGSGSDILR\tT557I\tENST00000308664.10:T557I(MS),*ENST00000539276.6:T557I(MS)\t\t\t\t\t\t\t\t\t\t0\t0\tNC_000012.10:g.110339630C>T\tENST00000539276.6:p.Thr557Ile\tENST0'..b't@Regulation of Telomerase@Glucocorticoid receptor regulatory network@Hypoxic and oxygen homeostasis regulation of HIF-1-alpha@ErbB receptor signaling network@LKB1 signaling events@Signaling events mediated by VEGFR1 and VEGFR2@Class I PI3K signaling events@Regulation of Androgen receptor activity\tENST00000216281.12:D393N\t0.02014\t0.84\t\t\t\t\t\tENST00000334701.11:D515N(0.749:0.04989),*ENST00000216281.12:D393N(0.84:0.02014)\t\tCOSM2262393\tlarge_intestine(1)\t1\t\t0.0\t\t\t\t\t\t\t\t\t\t\t\t\t../MuPIT_Interactive?gm=chr14:102083954\t\t\t\t\t\t\t\t\t\t0\t\t\t\t240809\t.\thet\t17374\t85275\t0.203740838464\t\t\r\n-6\tVAR522_unknown\tchr17\t82082606\t+\tC\tT\tunknown\tFASN\tMS\tENST00000306749.3\t-\tQGVQVQVSTSNISSLEGAR\tQGVQVQVSTSNINSLEGAR\tS1947N\tENST00000634990.1:S1945N(MS),*ENST00000306749.3:S1947N(MS)\t\t\t\t\t\t\tENST00000306749\tp.S1947N (large_intestine 1)\t1\t0\t0\tNC_000017.10:g.82082606C>T\tENST00000306749.3:p.Ser1947Asn\tENST00000634990.1:p.Ser1945Asn,*ENST00000306749.3:p.Ser1947Asn\t2\t34a994cc-5521-11e7-8f50-0ac135e8bacf,812903c2-5521-11e7-8f50-0ac135e8bacf\tValidated transcriptional targets of deltaNp63 isoforms@p73 transcription factor network\tENST00000634990.1:S1945N\t0.18611\t0.501\t\t\t\t\t\tENST00000306749.3:S1947N(0.493:0.19016),*ENST00000634990.1:S1945N(0.501:0.18611)\t\tCOSM4648107\tlarge_intestine(1)\t1\t\t0.0\t\t\t\t\t\t\t\t\t\t\t\t\t../MuPIT_Interactive?gm=chr17:82082606\t\t\t\t\t\t\t\t\t\t0\t\t\t\t10374.8\t.\thet\t776\t3826\t0.202822791427\t\t\r\n-7\tVAR523_unknown\tchr19\t17205335\t+\tA\tT\tunknown\tMYO9B\tMS\tENST00000594824.5\t+\tIQSHCSYTYGRKGEPGVEPGHFGVCVDSLTSDK\tIQSHCSYTYGRMGEPGAEPGHFGVCVDSLTSDK\tK1688M\tENST00000397274.6:K1688M(MS),ENST00000595618.5:K1688M(MS),*ENST00000594824.5:K1688M(MS)\t\t\t\t\t\t\t\t\t\t0\t0\tNC_000019.10:g.17205335A>T\tENST00000594824.5:p.Lys1688Met\tENST00000397274.6:p.Lys1688Met,ENST00000595618.5:p.Lys1688Met,*ENST00000594824.5:p.Lys1688Met\t1\t60f3521c-5521-11e7-8f50-0ac135e8bacf\tRegulation of RhoA activity\tENST00000397274.6:K1688M\t0.20028\t0.473\t\t\t\t\t\tENST00000594824.5:K1688M(0.464:0.20464),*ENST00000397274.6:K1688M(0.473:0.20028),ENST00000595618.5:K1688M(0.469:0.20231)\t\t\t\t1\t\t0.0\t\t\t\t\t\t\t\t\t\t\t\t\t../MuPIT_Interactive?gm=chr19:17205335\t\t\t\t\t\t\t\t\t\t0\t\t\t\t0.00158993\t.\thet\t2\t7\t0.285714285714\t\t\r\n-9\tVAR525_unknown\tchr19\t17205973\t+\tT\tC\tunknown\tMYO9B\tMS\tENST00000594824.5\t+\tIQSHCSYTYGRKGEPGVEPGHFGVCVDSLTSDK\tIQSHCSYTYGRMGEPGAEPGHFGVCVDSLTSDK\tV1693A\tENST00000397274.6:V1693A(MS),ENST00000595618.5:V1693A(MS),*ENST00000594824.5:V1693A(MS)\t\t\t\t\t\t\tENST00000319396\tp.V1693A (thyroid 2)\t2\t0.399857\t0.645631\tNC_000019.10:g.17205973T>C\tENST00000594824.5:p.Val1693Ala\tENST00000397274.6:p.Val1693Ala,ENST00000595618.5:p.Val1693Ala,*ENST00000594824.5:p.Val1693Ala\t1\t60f3521c-5521-11e7-8f50-0ac135e8bacf\tRegulation of RhoA activity\tENST00000397274.6:V1693A\t0.95254\t0.045\t\t\t\t\t\tENST00000594824.5:V1693A(0.025:0.98158),*ENST00000397274.6:V1693A(0.045:0.95254),ENST00000595618.5:V1693A(0.042:0.95749)\t\tCOSM438878\tthyroid(2)\t1\trs7248508\t0.522744\t0.526958747465\t0.685404424473\t0.728029336735\t0.352247807018\t0.776672496721\t0.453231381586\t0.406255640183\t0.486462728551\t0.596900776808\t190609063506732,203395363506733,206865653506734,208819603506735,224846277709335,224792027709336\t19060906,20339536,20686565,20881960,22484627,22479202\tLDL cholesterol(0.0152),HDL cholesterol(0.0279),Triglycerides(0.0141),Height(0.0104),Obesity with early age of onset (age >2)(0.0471),Adiponectin levels(0.0294)\t../MuPIT_Interactive?gm=chr19:17205973\t\t\t\t\t\t\t\t\t\t0.631589\t\t\t\t12243.8\t.\thom\t812\t406\t2.0\t\t\r\n-10\tVAR526_unknown\tchr19\t18856059\t+\tC\tT\tunknown\tUPF1\tMS\tENST00000599848.5\t+\tEAIDSPVSFLALHNQIR\tEAIDSPVSFLVLHNQIR\tA571V\tENST00000262803.9:A560V(MS),*ENST00000599848.5:A571V(MS)\t\t\t\t\t\t\tENST00000262803\tp.A560V (large_intestine 1)\t1\t0\t0\tNC_000019.10:g.18856059C>T\tENST00000599848.5:p.Ala571Val\tENST00000262803.9:p.Ala560Val,*ENST00000599848.5:p.Ala571Val\t0\t\t\tENST00000262803.9:A560V\t0.09372\t0.662\t\t\t\t\t\tENST00000599848.5:A571V(0.643:0.10292),*ENST00000262803.9:A560V(0.662:0.09372)\t\tCOSM3100527\tlarge_intestine(1)\t1\t\t0.0\t\t\t\t\t\t\t\t\t\t\t\t\t../MuPIT_Interactive?gm=chr19:18856059\t\t\t\t\t\t\t\t\t\t0\t\t\t\t10269.5\t.\thet\t592\t2379\t0.248844052123\t\t\r\n'