changeset 7:da74170c55c7 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
author iuc
date Tue, 10 Mar 2020 06:14:55 -0400
parents 840fb4850be3
children 77a1e60fd1de
files gemini_mafify.py gemini_query.xml test-data/gemini_query_as_maf_result.tabular
diffstat 3 files changed, 413 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_mafify.py	Tue Mar 10 06:14:55 2020 -0400
@@ -0,0 +1,270 @@
+import string
+import sys
+
+
+so_to_maf = {
+    'splice_acceptor_variant': 'Splice_Site',
+    'splice_donor_variant': 'Splice_Site',
+    'transcript_ablation': 'Splice_Site',
+    'exon_loss_variant': 'Splice_Site',
+    'stop_gained': 'Nonsense_Mutation',
+    'stop_lost': 'Nonstop_Mutation',
+    'frameshift_variant': 'Frame_Shift_',
+    'initiator_codon_variant': 'Translation_Start_Site',
+    'start_lost': 'Translation_Start_Site',
+    'inframe_insertion': 'In_Frame_Ins',
+    'inframe_deletion': 'In_Frame_Del',
+    'conservative_inframe_insertion': 'In_Frame_Ins',
+    'conservative_inframe_deletion': 'In_Frame_Del',
+    'disruptive_inframe_insertion': 'In_Frame_Ins',
+    'disruptive_inframe_deletion': 'In_Frame_Del',
+    'missense_variant': 'Missense_Mutation',
+    'coding_sequence_variant': 'Missense_Mutation',
+    'conservative_missense_variant': 'Missense_Mutation',
+    'rare_amino_acid_variant': 'Missense_Mutation',
+    'transcript_amplification': 'Intron',
+    'intron_variant': 'Intron',
+    'INTRAGENIC': 'Intron',
+    'intragenic_variant': 'Intron',
+    'splice_region_variant': 'Splice_Region',
+    'mature_miRNA_variant': 'RNA',
+    'exon_variant': 'RNA',
+    'non_coding_exon_variant': 'RNA',
+    'non_coding_transcript_exon_variant': 'RNA',
+    'non_coding_transcript_variant': 'RNA',
+    'nc_transcript_variant': 'RNA',
+    'stop_retained_variant': 'Silent',
+    'synonymous_variant': 'Silent',
+    'NMD_transcript_variant': 'Silent',
+    'incomplete_terminal_codon_variant': 'Silent',
+    '5_prime_UTR_variant': "5'UTR",
+    '5_prime_UTR_premature_start_codon_gain_variant': "5'UTR",
+    '3_prime_UTR_variant': "3'UTR",
+    'intergenic_variant': 'IGR',
+    'intergenic_region': 'IGR',
+    'regulatory_region_variant': 'IGR',
+    'regulatory_region': 'IGR',
+    'TF_binding_site_variant': 'IGR',
+    'upstream_gene_variant': "5'Flank",
+    'downstream_gene_variant': "3'Flank",
+}
+
+
+class VariantEffect():
+    def __init__(self, variant_type):
+        self.variant_type = variant_type.capitalize()
+        assert self.variant_type in ['Snp', 'Ins', 'Del']
+
+    def __getitem__(self, so_effect):
+        if so_effect not in so_to_maf or (
+            'frame' in so_effect and self.variant_type == 'Snp'
+        ):
+            return 'Targeted_Region'
+
+        ret = so_to_maf[so_effect]
+        if ret == 'Frame_Shift_':
+            ret += self.variant_type
+        return ret
+
+
+infile = sys.argv[1]
+if len(sys.argv) > 2:
+    tumor_sample_name = sys.argv[2]
+if len(sys.argv) > 3:
+    normal_sample_name = sys.argv[3]
+
+start_pos_idx = None
+ref_idx = None
+alt_idx = None
+variant_type_idx = None
+variant_classification_idx = None
+gt_alt_depths_idx = {}
+gt_ref_depths_idx = {}
+gts_idx = {}
+samples = set()
+required_fields = [
+    'Hugo_Symbol',
+    'NCBI_Build',
+    'Variant_Type',
+    'Variant_Classification',
+    'Tumor_Sample_Barcode',
+    'HGVSp_Short'
+]
+
+
+with open(infile) as data_in:
+    cols = data_in.readline().rstrip().split('\t')
+    for field in required_fields:
+        if field not in cols:
+            raise IndexError(
+                'Cannot generate valid MAF without the following input '
+                'columns: {0}.\n'
+                'Missing column: "{1}"'
+                .format(required_fields, field)
+            )
+    for i, col in enumerate(cols):
+        if col == 'Variant_Type':
+            variant_type_idx = i
+        elif col == 'Variant_Classification':
+            variant_classification_idx = i
+        elif col == 'Start_Position':
+            start_pos_idx = i
+        elif col == 'Reference_Allele':
+            ref_idx = i
+        elif col == 'alt':
+            alt_idx = i
+        else:
+            column, _, sample = col.partition('.')
+            if sample:
+                if column == 'gt_alt_depths':
+                    gt_alt_depths_idx[sample] = i
+                elif column == 'gt_ref_depths':
+                    gt_ref_depths_idx[sample] = i
+                elif column == 'gts':
+                    gts_idx[sample] = i
+                else:
+                    # not a recognized sample-specific column
+                    continue
+                samples.add(sample)
+
+    if ref_idx is None:
+        raise IndexError('Input file does not have a column "Reference_Allele".')
+
+    if not tumor_sample_name:
+        if normal_sample_name:
+            raise ValueError(
+                'Normal sample name requires the tumor sample name to be '
+                'specified, too.'
+            )
+        if len(samples) > 1:
+            raise ValueError(
+                'A tumor sample name is required with more than one sample '
+                'in the input.'
+            )
+        if samples:
+            # There is a single sample with genotype data.
+            # Assume its the tumor sample.
+            tumor_sample_name = next(iter(samples))
+    else:
+        if tumor_sample_name not in samples:
+            raise ValueError(
+                'Could not find information about the specified tumor sample '
+                'in the input.'
+            )
+        if tumor_sample_name == normal_sample_name:
+            raise ValueError(
+                'Need different names for the normal and the tumor sample.'
+            )
+
+    if normal_sample_name and normal_sample_name not in samples:
+        raise ValueError(
+            'Could not find information about the specified normal sample '
+            'in the input.'
+        )
+
+    # All input data checks passed!
+    # Now extract just the relevant index numbers for the tumor/normal pair
+    gts_idx = (
+        gts_idx.get(tumor_sample_name, alt_idx),
+        gts_idx.get(normal_sample_name)
+    )
+    gt_alt_depths_idx = (
+        gt_alt_depths_idx.get(tumor_sample_name),
+        gt_alt_depths_idx.get(normal_sample_name)
+    )
+    gt_ref_depths_idx = (
+        gt_ref_depths_idx.get(tumor_sample_name),
+        gt_ref_depths_idx.get(normal_sample_name)
+    )
+
+    # Echo all MAF column names
+    cols_to_print = []
+    for n in range(len(cols)):
+        if n in gts_idx:
+            continue
+        if n in gt_alt_depths_idx:
+            continue
+        if n in gt_ref_depths_idx:
+            continue
+        if n != alt_idx:
+            cols_to_print.append(n)
+
+    print('\t'.join([cols[n] for n in cols_to_print]))
+
+    for line in data_in:
+        cols = line.rstrip().split('\t')
+
+        gt_alt_depths = [
+            int(cols[ad_idx]) if ad_idx else ''
+            for ad_idx in gt_alt_depths_idx
+        ]
+        gt_ref_depths = [
+            int(cols[rd_idx]) if rd_idx else ''
+            for rd_idx in gt_ref_depths_idx
+        ]
+
+        gts = [
+            ['', ''],
+            ['', '']
+        ]
+        for n, gt_idx in enumerate(gts_idx):
+            if gt_idx:
+                gt_sep = '/' if '/' in cols[gt_idx] else '|'
+                allele1, _, allele2 = [
+                    '' if allele == '.' else allele
+                    for allele in cols[gt_idx].partition(gt_sep)
+                ]
+                # follow cBioportal recommendation to leave allele1 empty
+                # when information is not avaliable
+                if not allele2:
+                    gts[n] = [allele2, allele1]
+                else:
+                    gts[n] = [allele1, allele2]
+        if not gts:
+            gts = [['', ''], ['', '']]
+
+        if cols[variant_type_idx].lower() in ['ins', 'del']:
+            # transform VCF-style indel representations into MAF ones
+            ref_allele = cols[ref_idx]
+            for n, nucs in enumerate(
+                zip(
+                    ref_allele,
+                    *[allele for gt in gts for allele in gt if allele]
+                )
+            ):
+                if any(nuc != nucs[0] for nuc in nucs[1:]):
+                    break
+            else:
+                n += 1
+            if n > 0:
+                cols[ref_idx] = cols[ref_idx][n:] or '-'
+                for gt in gts:
+                    for idx, allele in enumerate(gt):
+                        if allele:
+                            gt[idx] = allele[n:] or '-'
+                if cols[ref_idx] == '-':
+                    n -= 1
+                cols[start_pos_idx] = str(int(cols[start_pos_idx]) + n)
+
+        # in-place substitution of so_effect with MAF effect
+        cols[variant_classification_idx] = VariantEffect(
+            cols[variant_type_idx]
+        )[cols[variant_classification_idx]]
+        ret_line = '\t'.join([cols[n] for n in cols_to_print])
+
+        field_formatters = {
+            'tumor_seq_allele1': gts[0][0],
+            'tumor_seq_allele2': gts[0][1],
+            'match_norm_seq_allele1': gts[1][0],
+            'match_norm_seq_allele2': gts[1][1],
+            't_alt_count': gt_alt_depths[0],
+            'n_alt_count': gt_alt_depths[1],
+            't_ref_count': gt_ref_depths[0],
+            'n_ref_count': gt_ref_depths[1],
+        }
+
+        print(
+            # use safe_substitute here to avoid key errors with column content
+            # looking like unknown placeholders
+            string.Template(ret_line).safe_substitute(field_formatters)
+        )
--- a/gemini_query.xml	Fri Jan 24 17:31:00 2020 -0500
+++ b/gemini_query.xml	Tue Mar 10 06:14:55 2020 -0400
@@ -1,4 +1,4 @@
-<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@">
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@+galaxy1">
     <description>Querying the GEMINI database</description>
     <macros>
         <import>gemini_macros.xml</import>
@@ -27,6 +27,13 @@
             <param argument="--dgidb" name="dgidb" type="boolean" truevalue="--dgidb" falsevalue="" checked="False"
             label="Request drug-gene interaction info from DGIdb" help="" />
         </xml>
+        <xml name="maf_extra_info">
+            <param name="tumor_sample_name" type="text"
+            label="Name of the tumor sample in the (multi-sample) GEMINI database"
+            help="Specify only if the tumor sample is not the only sample in the database." />
+            <param name="normal_sample_name" type="text"
+            label="Name of the normal sample in the GEMINI database (for matched tumor/normal sample pair analyses only)" />
+        </xml>
     </macros>
     <expand macro="requirements" />
     <expand macro="stdio" />
@@ -69,7 +76,7 @@
                 #else:
                     affected
                 #end if
-            #else:
+            #elif str($query.oformat.report.format) != 'maf':
                 --format ${query.oformat.report.format}
             #end if
 
@@ -77,11 +84,36 @@
                 ## build the SQL query string from its components
                 #if str($query.oformat.report.format) in ('vcf', 'tped'):
                     #set $cols = "*"
+                #elif str($query.oformat.report.format) == 'maf':
+                    #if str($query.oformat.report.tumor_sample_name):
+                        #set $gt_string = 'gt_alt_depths.{0}, gt_ref_depths.{0}, gts.{0}'.format(str($query.oformat.report.tumor_sample_name))
+                        #if str($query.oformat.report.normal_sample_name):
+                            #set $gt_string = $gt_string + ', gt_alt_depths.{0}, gt_ref_depths.{0}, gts.{0}'.format(str($query.oformat.report.normal_sample_name))
+                        #end if
+                    #else:
+                        #set $gt_string = '(gt_alt_depths).(*), (gt_ref_depths).(*), (gts).(*)'
+                    #end if
+                    #if str($query.oformat.report.mutation_status.status_select) == 'custom':
+                        ## Need to quote the user-specified mutation status for the SQL query
+                        #set $mutation_status = '"%s"' % str($query.oformat.report.mutation_status.status_custom)
+                    #elif str($query.oformat.report.mutation_status.status_select) == 'expression':
+                        ## For custom expressions, it is up to the user to ensure valid syntax
+                        #set $mutation_status = str($query.oformat.report.mutation_status.status_expression)
+                    #else:
+                        ## The user selected a fixed value from the list, but
+                        ## it still needs quoting.
+                        #set $mutation_status = '"%s"' % str($query.oformat.report.mutation_status.status_select)
+                    #end if
+                    #set $cols = 'ifnull(g1.gene, "unknown") AS Hugo_Symbol, ifnull(ifnull(g2.entrez_id, g1.entrez_id), "") AS Entrez_Gene_Id, "" AS Center, "37" AS NCBI_Build, replace(v.chrom, "chr", "") AS Chromosome, v.start + 1 AS Start_Position, v.end AS End_Position, "" as Strand, v.impact_so AS Variant_Classification, ifnull(nullif(v.type, "indel"), v.sub_type) AS Variant_Type, v.ref AS Reference_Allele, "${tumor_seq_allele1}" AS Tumor_Seq_Allele1, "${tumor_seq_allele2}" AS Tumor_Seq_Allele2, ifnull(v.rs_ids, ifnull(nullif(ifnull(nullif(v.in_omim = 0 AND v.cosmic_ids IS NULL AND v.max_aaf_all = -1, 1), "novel"), 0), "")) AS dbSNP_RS, "" AS dbSNP_Val_Status, printf("%s", "' + str($query.oformat.report.tumor_sample_id) + '") AS Tumor_Sample_Barcode, printf("%s", "' + str($query.oformat.report.norm_sample_id) + '") AS Matched_Norm_Sample_Barcode, "${match_norm_seq_allele1}" AS Match_Norm_Seq_Allele1, "${match_norm_seq_allele2}" AS Match_Norm_Seq_Allele2, "" AS Tumor_Validation_Allele1, "" AS Tumor_Validation_Allele2, "" AS Match_Norm_Validation_Allele1, "" AS Match_Norm_Validation_Allele2, "" AS Verification_Status, "" AS Validation_Status, ' + $mutation_status + ' AS Mutation_Status, "" AS Sequencing_Phase, "" AS Sequence_Source, "" AS Validation_Method, "" AS Score, "" AS BAM_File, "" AS Sequencer, ifnull(nullif(v.aa_change, ""), "p.=") AS HGVSp_Short, "${t_alt_count}" AS t_alt_count, "${t_ref_count}" AS t_ref_count, "${n_alt_count}" AS n_alt_count, "${n_ref_count}" AS n_ref_count, v.alt, ' + $gt_string
                 #else:
                     #set $report = $query.oformat.report.report
                     @SET_COLS@
                 #end if
                 #set $q = "SELECT %s FROM variants" % $cols
+                #if str($query.oformat.report.format) == 'maf':
+                    #set $q = $q + ' v LEFT JOIN (SELECT DISTINCT gene, is_hgnc, hgnc_id, entrez_id, chrom FROM gene_detailed) g1 ON v.gene = g1.gene AND v.chrom = g1.chrom LEFT JOIN (SELECT DISTINCT gene, is_hgnc, hgnc_id, entrez_id, transcript, chrom, ensembl_gene_id FROM gene_detailed) g2 ON g1.gene = g2.gene AND (v.transcript = g2.transcript OR v.transcript=g2.ensembl_gene_id)'
+                #end if
+
                 #set $where_clause_elements = []
                 #if str($query.filter).strip():
                     #silent $where_clause_elements.append(str($query.filter).strip())
@@ -95,6 +127,9 @@
                 #if $where_clause_elements:
                     #set $q = $q + " WHERE " + " AND ".join($where_clause_elements)
                 #end if
+                #if str($query.oformat.report.format) == 'maf':
+                    #set $q = $q + " GROUP BY v.variant_id"
+                #end if
                 #if str($query.oformat.report.order_by).strip():
                     #set $q = $q + " ORDER BY " + str($query.oformat.report.order_by).strip() + str($query.oformat.report.sort_order)
                 #end if
@@ -108,6 +143,9 @@
             @MULTILN_SQL_EXPR_TO_CMDLN@
 
             '$infile'
+            #if str($query.oformat.report.format) == 'maf':
+                > temp.txt && python '$__tool_directory__/gemini_mafify.py' temp.txt '${query.oformat.report.tumor_sample_name}' '${query.oformat.report.normal_sample_name}'
+            #end if
             > '$outfile'
 ]]>
     </command>
@@ -136,6 +174,7 @@
                             <option value="carrier_summary">tabular with carrier summary</option>
                             <option value="vcf">VCF (simplified)</option>
                             <option value="json">JSON</option>
+                            <option value="maf">MAF (cBioportal-compatible)</option>
                             <option value="tped">TPED</option>
                         </param>
                         <when value="default">
@@ -194,6 +233,50 @@
                             <param name="dgidb" type="hidden" value="" />
                             <expand macro="sorting" />
                         </when>
+                        <when value="maf">
+                            <param name="header" type="hidden" value="--header" />
+                            <param name="dgidb" type="hidden" value="" />
+                            <expand macro="maf_extra_info" />
+                            <param name="tumor_sample_id" type="text"
+                            label="Tumor sample ID">
+                            <validator type="expression" message="A tumor sample identifier is required">value.strip()</validator>
+                            </param>
+                            <param name="norm_sample_id" type="text"
+                            label="Normal sample ID (for matched tumor/normal sample pair analyses only)" />
+                            <conditional name="mutation_status">
+                                <param name="status_select" type="select"
+                                label="Mutation status to be recorded for variants reported with this query"
+                                help="'Somatic', 'Germline', 'LOH', 'Wildtype' and 'None' are fixed values that are treated explicitly by cBioportal. In particular, 'Somatic' and 'Germline' are supported by the cBioportal user interface in the Mutations tab, while 'LOH', 'Wildtype' and 'None' will not be loaded. Any other values will cause the variants to be loaded into cBioPortal and will be displayed as text in the Mutations tab.">
+                                    <option value="expression">Calculate per variant</option>
+                                    <option value="custom">Other fixed value</option>
+                                    <option value="Somatic" selected="true">Somatic</option>
+                                    <option value="Germline">Germline</option>
+                                    <option value="LOH">LOH</option>
+                                    <option value="Wildtype">Wildtype</option>
+                                    <option value="None">None</option>
+                                </param>
+                                <when value="Somatic" />
+                                <when value="Germline" />
+                                <when value="LOH" />
+                                <when value="Wildtype" />
+                                <when value="None" />
+                                <when value="custom">
+                                    <param name="status_custom" type="text"
+                                    label="Mutation status (custom value)">
+                                        <validator type="expression" message="Need a value for Mutation status">value.strip()</validator>
+                                    </param>
+                                </when>
+                                <when value="expression">
+                                    <param name="status_expression" type="text"
+                                    label="SQL expression used to compute per-variant status"
+                                    help="Enter a valid SQL result column expression to compute the mutation status from columns of the variants table in the GEMINI database. As one example, the expression ifnull(nullif(ifnull(nullif(ifnull(nullif(somatic_status, 3), 'LOH'), 2), 'Somatic'), 1), 'Germline') assumes that you have a column somatic_status added to the variants table of your database, and will record 'Germline', 'Somatic', or 'LOH' for variants with a value of 1, 2, or 3 in that column, respectively.">
+                                        <expand macro="sanitize_query" />
+                                        <validator type="expression" message="Mutation status expression cannot be empty">value.strip()</validator>
+                                    </param>
+                                </when>
+                            </conditional>
+                            <expand macro="sorting" />
+                        </when>
                         <when value="tped">
                             <param name="header" type="hidden" value="" />
                             <param name="dgidb" type="hidden" value="" />
@@ -222,6 +305,7 @@
                             <option value="carrier_summary">tabular with carrier summary</option>
                             <option value="vcf">VCF (simplified)</option>
                             <option value="json">JSON</option>
+                            <option value="maf">MAF (cBioportal-compatible)</option>
                             <option value="tped">TPED</option>
                         </param>
                         <when value="default">
@@ -255,6 +339,11 @@
                             <param name="header" type="hidden" value="" />
                             <param name="dgidb" type="hidden" value="" />
                         </when>
+                        <when value="maf">
+                            <param name="header" type="hidden" value="--header" />
+                            <param name="dgidb" type="hidden" value="" />
+                            <expand macro="maf_extra_info" />
+                        </when>
                         <when value="tped">
                             <param name="header" type="hidden" value="" />
                             <param name="dgidb" type="hidden" value="" />
@@ -285,6 +374,19 @@
                 </assert_contents>
             </output>
         </test>
+        <test>
+            <param name="infile" value="gemini_load_result1.db" ftype="gemini.sqlite" />
+            <conditional name="query">
+                <param name="interface" value="basic" />
+            </conditional>
+            <section name="oformat">
+                <conditional name="report">
+                    <param name="format" value="maf" />
+                    <param name="tumor_sample_id" value="test" />
+                </conditional>
+            </section>
+            <output name="outfile" file="gemini_query_as_maf_result.tabular" />
+        </test>
     </tests>
     <help>
 <![CDATA[
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_query_as_maf_result.tabular	Tue Mar 10 06:14:55 2020 -0400
@@ -0,0 +1,39 @@
+Hugo_Symbol	Entrez_Gene_Id	Center	NCBI_Build	Chromosome	Start_Position	End_Position	Strand	Variant_Classification	Variant_Type	Reference_Allele	Tumor_Seq_Allele1	Tumor_Seq_Allele2	dbSNP_RS	dbSNP_Val_Status	Tumor_Sample_Barcode	Matched_Norm_Sample_Barcode	Match_Norm_Seq_Allele1	Match_Norm_Seq_Allele2	Tumor_Validation_Allele1	Tumor_Validation_Allele2	Match_Norm_Validation_Allele1	Match_Norm_Validation_Allele2	Verification_Status	Validation_Status	Mutation_Status	Sequencing_Phase	Sequence_Source	Validation_Method	Score	BAM_File	Sequencer	HGVSp_Short	t_alt_count	t_ref_count	n_alt_count	n_ref_count
+unknown			37	1	10583	10583		5'Flank	snp	G		A	novel		test										Somatic							p.=				
+unknown			37	1	10611	10611		5'Flank	snp	C		G	novel		test										Somatic							p.=				
+unknown			37	1	13302	13302		RNA	snp	C		T	novel		test										Somatic							p.=				
+unknown			37	1	13327	13327		RNA	snp	G		C	novel		test										Somatic							p.=				
+unknown			37	1	13958	13958		RNA	del	C		-	novel		test										Somatic							p.=				
+unknown			37	1	13980	13980		RNA	snp	T		C	novel		test										Somatic							p.=				
+unknown			37	1	30923	30923		3'Flank	snp	G		T	novel		test										Somatic							p.=				
+unknown			37	1	46402	46402		IGR	ins	-		TGT	novel		test										Somatic							p.=				
+unknown			37	1	47190	47190		IGR	ins	-		A	novel		test										Somatic							p.=				
+unknown			37	1	51476	51476		IGR	snp	T		C	novel		test										Somatic							p.=				
+unknown			37	1	51479	51479		IGR	snp	T		A	novel		test										Somatic							p.=				
+unknown			37	1	51914	51914		IGR	snp	T		G	novel		test										Somatic							p.=				
+unknown			37	1	51935	51935		IGR	snp	C		T	novel		test										Somatic							p.=				
+unknown			37	1	51954	51954		IGR	snp	G		C	novel		test										Somatic							p.=				
+unknown			37	1	52058	52058		IGR	snp	G		C	novel		test										Somatic							p.=				
+unknown			37	1	52144	52144		IGR	snp	T		A	novel		test										Somatic							p.=				
+unknown			37	1	52186	52188		IGR	del	TAA		-	novel		test										Somatic							p.=				
+unknown			37	1	52238	52238		IGR	snp	T		G	novel		test										Somatic							p.=				
+unknown			37	1	53235	53236		IGR	del	AT		-	novel		test										Somatic							p.=				
+unknown			37	1	54353	54353		IGR	snp	C		A	novel		test										Somatic							p.=				
+unknown			37	1	54421	54421		IGR	snp	A		G	novel		test										Somatic							p.=				
+unknown			37	1	54490	54490		IGR	snp	G		A	novel		test										Somatic							p.=				
+unknown			37	1	54676	54676		IGR	snp	C		T	novel		test										Somatic							p.=				
+unknown			37	1	54753	54753		IGR	snp	T		G	novel		test										Somatic							p.=				
+unknown			37	1	55164	55164		IGR	snp	C		A	novel		test										Somatic							p.=				
+unknown			37	1	55249	55249		IGR	ins	-		TATGG	novel		test										Somatic							p.=				
+unknown			37	1	55299	55299		IGR	snp	C		T	novel		test										Somatic							p.=				
+unknown			37	1	55313	55313		IGR	snp	A		T	novel		test										Somatic							p.=				
+unknown			37	1	55326	55326		IGR	snp	T		C	novel		test										Somatic							p.=				
+unknown			37	1	55330	55330		IGR	snp	G		A	novel		test										Somatic							p.=				
+unknown			37	1	55367	55367		IGR	snp	G		A	novel		test										Somatic							p.=				
+unknown			37	1	55388	55388		IGR	snp	C		T	novel		test										Somatic							p.=				
+unknown			37	1	55394	55394		IGR	snp	T		A	novel		test										Somatic							p.=				
+unknown			37	1	55416	55416		IGR	snp	G		A	novel		test										Somatic							p.=				
+unknown			37	1	55427	55427		IGR	snp	T		C	novel		test										Somatic							p.=				
+unknown			37	1	55816	55816		IGR	snp	G		A	novel		test										Somatic							p.=				
+unknown			37	1	55850	55850		IGR	snp	C		G	novel		test										Somatic							p.=				
+unknown			37	1	55852	55852		IGR	snp	G		C	novel		test										Somatic							p.=