annotate gemini_mafify.py @ 8:db47f4939381 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit f7bdf08922aaf4119aefe7041e754a69cf64aebd
author iuc
date Wed, 13 Jul 2022 15:35:54 +0000
parents 142d95ab942e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
1 import string
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
2 import sys
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
3
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
4
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
5 so_to_maf = {
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
6 'splice_acceptor_variant': 'Splice_Site',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
7 'splice_donor_variant': 'Splice_Site',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
8 'transcript_ablation': 'Splice_Site',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
9 'exon_loss_variant': 'Splice_Site',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
10 'stop_gained': 'Nonsense_Mutation',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
11 'stop_lost': 'Nonstop_Mutation',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
12 'frameshift_variant': 'Frame_Shift_',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
13 'initiator_codon_variant': 'Translation_Start_Site',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
14 'start_lost': 'Translation_Start_Site',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
15 'inframe_insertion': 'In_Frame_Ins',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
16 'inframe_deletion': 'In_Frame_Del',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
17 'conservative_inframe_insertion': 'In_Frame_Ins',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
18 'conservative_inframe_deletion': 'In_Frame_Del',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
19 'disruptive_inframe_insertion': 'In_Frame_Ins',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
20 'disruptive_inframe_deletion': 'In_Frame_Del',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
21 'missense_variant': 'Missense_Mutation',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
22 'coding_sequence_variant': 'Missense_Mutation',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
23 'conservative_missense_variant': 'Missense_Mutation',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
24 'rare_amino_acid_variant': 'Missense_Mutation',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
25 'transcript_amplification': 'Intron',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
26 'intron_variant': 'Intron',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
27 'INTRAGENIC': 'Intron',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
28 'intragenic_variant': 'Intron',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
29 'splice_region_variant': 'Splice_Region',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
30 'mature_miRNA_variant': 'RNA',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
31 'exon_variant': 'RNA',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
32 'non_coding_exon_variant': 'RNA',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
33 'non_coding_transcript_exon_variant': 'RNA',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
34 'non_coding_transcript_variant': 'RNA',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
35 'nc_transcript_variant': 'RNA',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
36 'stop_retained_variant': 'Silent',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
37 'synonymous_variant': 'Silent',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
38 'NMD_transcript_variant': 'Silent',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
39 'incomplete_terminal_codon_variant': 'Silent',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
40 '5_prime_UTR_variant': "5'UTR",
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
41 '5_prime_UTR_premature_start_codon_gain_variant': "5'UTR",
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
42 '3_prime_UTR_variant': "3'UTR",
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
43 'intergenic_variant': 'IGR',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
44 'intergenic_region': 'IGR',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
45 'regulatory_region_variant': 'IGR',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
46 'regulatory_region': 'IGR',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
47 'TF_binding_site_variant': 'IGR',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
48 'upstream_gene_variant': "5'Flank",
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
49 'downstream_gene_variant': "3'Flank",
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
50 }
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
51
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
52
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
53 class VariantEffect():
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
54 def __init__(self, variant_type):
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
55 self.variant_type = variant_type.capitalize()
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
56 assert self.variant_type in ['Snp', 'Ins', 'Del']
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
57
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
58 def __getitem__(self, so_effect):
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
59 if so_effect not in so_to_maf or (
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
60 'frame' in so_effect and self.variant_type == 'Snp'
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
61 ):
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
62 return 'Targeted_Region'
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
63
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
64 ret = so_to_maf[so_effect]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
65 if ret == 'Frame_Shift_':
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
66 ret += self.variant_type
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
67 return ret
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
68
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
69
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
70 infile = sys.argv[1]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
71 if len(sys.argv) > 2:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
72 tumor_sample_name = sys.argv[2]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
73 if len(sys.argv) > 3:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
74 normal_sample_name = sys.argv[3]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
75
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
76 start_pos_idx = None
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
77 ref_idx = None
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
78 alt_idx = None
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
79 variant_type_idx = None
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
80 variant_classification_idx = None
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
81 gt_alt_depths_idx = {}
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
82 gt_ref_depths_idx = {}
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
83 gts_idx = {}
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
84 samples = set()
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
85 required_fields = [
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
86 'Hugo_Symbol',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
87 'NCBI_Build',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
88 'Variant_Type',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
89 'Variant_Classification',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
90 'Tumor_Sample_Barcode',
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
91 'HGVSp_Short'
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
92 ]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
93
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
94
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
95 with open(infile) as data_in:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
96 cols = data_in.readline().rstrip().split('\t')
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
97 for field in required_fields:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
98 if field not in cols:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
99 raise IndexError(
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
100 'Cannot generate valid MAF without the following input '
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
101 'columns: {0}.\n'
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
102 'Missing column: "{1}"'
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
103 .format(required_fields, field)
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
104 )
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
105 for i, col in enumerate(cols):
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
106 if col == 'Variant_Type':
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
107 variant_type_idx = i
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
108 elif col == 'Variant_Classification':
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
109 variant_classification_idx = i
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
110 elif col == 'Start_Position':
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
111 start_pos_idx = i
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
112 elif col == 'Reference_Allele':
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
113 ref_idx = i
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
114 elif col == 'alt':
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
115 alt_idx = i
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
116 else:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
117 column, _, sample = col.partition('.')
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
118 if sample:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
119 if column == 'gt_alt_depths':
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
120 gt_alt_depths_idx[sample] = i
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
121 elif column == 'gt_ref_depths':
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
122 gt_ref_depths_idx[sample] = i
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
123 elif column == 'gts':
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
124 gts_idx[sample] = i
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
125 else:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
126 # not a recognized sample-specific column
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
127 continue
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
128 samples.add(sample)
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
129
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
130 if ref_idx is None:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
131 raise IndexError('Input file does not have a column "Reference_Allele".')
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
132
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
133 if not tumor_sample_name:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
134 if normal_sample_name:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
135 raise ValueError(
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
136 'Normal sample name requires the tumor sample name to be '
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
137 'specified, too.'
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
138 )
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
139 if len(samples) > 1:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
140 raise ValueError(
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
141 'A tumor sample name is required with more than one sample '
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
142 'in the input.'
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
143 )
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
144 if samples:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
145 # There is a single sample with genotype data.
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
146 # Assume its the tumor sample.
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
147 tumor_sample_name = next(iter(samples))
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
148 else:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
149 if tumor_sample_name not in samples:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
150 raise ValueError(
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
151 'Could not find information about the specified tumor sample '
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
152 'in the input.'
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
153 )
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
154 if tumor_sample_name == normal_sample_name:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
155 raise ValueError(
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
156 'Need different names for the normal and the tumor sample.'
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
157 )
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
158
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
159 if normal_sample_name and normal_sample_name not in samples:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
160 raise ValueError(
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
161 'Could not find information about the specified normal sample '
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
162 'in the input.'
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
163 )
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
164
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
165 # All input data checks passed!
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
166 # Now extract just the relevant index numbers for the tumor/normal pair
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
167 gts_idx = (
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
168 gts_idx.get(tumor_sample_name, alt_idx),
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
169 gts_idx.get(normal_sample_name)
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
170 )
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
171 gt_alt_depths_idx = (
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
172 gt_alt_depths_idx.get(tumor_sample_name),
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
173 gt_alt_depths_idx.get(normal_sample_name)
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
174 )
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
175 gt_ref_depths_idx = (
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
176 gt_ref_depths_idx.get(tumor_sample_name),
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
177 gt_ref_depths_idx.get(normal_sample_name)
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
178 )
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
179
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
180 # Echo all MAF column names
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
181 cols_to_print = []
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
182 for n in range(len(cols)):
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
183 if n in gts_idx:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
184 continue
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
185 if n in gt_alt_depths_idx:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
186 continue
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
187 if n in gt_ref_depths_idx:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
188 continue
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
189 if n != alt_idx:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
190 cols_to_print.append(n)
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
191
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
192 print('\t'.join([cols[n] for n in cols_to_print]))
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
193
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
194 for line in data_in:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
195 cols = line.rstrip().split('\t')
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
196
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
197 gt_alt_depths = [
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
198 int(cols[ad_idx]) if ad_idx else ''
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
199 for ad_idx in gt_alt_depths_idx
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
200 ]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
201 gt_ref_depths = [
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
202 int(cols[rd_idx]) if rd_idx else ''
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
203 for rd_idx in gt_ref_depths_idx
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
204 ]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
205
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
206 gts = [
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
207 ['', ''],
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
208 ['', '']
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
209 ]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
210 for n, gt_idx in enumerate(gts_idx):
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
211 if gt_idx:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
212 gt_sep = '/' if '/' in cols[gt_idx] else '|'
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
213 allele1, _, allele2 = [
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
214 '' if allele == '.' else allele
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
215 for allele in cols[gt_idx].partition(gt_sep)
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
216 ]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
217 # follow cBioportal recommendation to leave allele1 empty
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
218 # when information is not avaliable
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
219 if not allele2:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
220 gts[n] = [allele2, allele1]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
221 else:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
222 gts[n] = [allele1, allele2]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
223 if not gts:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
224 gts = [['', ''], ['', '']]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
225
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
226 if cols[variant_type_idx].lower() in ['ins', 'del']:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
227 # transform VCF-style indel representations into MAF ones
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
228 ref_allele = cols[ref_idx]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
229 for n, nucs in enumerate(
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
230 zip(
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
231 ref_allele,
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
232 *[allele for gt in gts for allele in gt if allele]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
233 )
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
234 ):
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
235 if any(nuc != nucs[0] for nuc in nucs[1:]):
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
236 break
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
237 else:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
238 n += 1
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
239 if n > 0:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
240 cols[ref_idx] = cols[ref_idx][n:] or '-'
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
241 for gt in gts:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
242 for idx, allele in enumerate(gt):
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
243 if allele:
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
244 gt[idx] = allele[n:] or '-'
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
245 if cols[ref_idx] == '-':
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
246 n -= 1
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
247 cols[start_pos_idx] = str(int(cols[start_pos_idx]) + n)
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
248
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
249 # in-place substitution of so_effect with MAF effect
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
250 cols[variant_classification_idx] = VariantEffect(
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
251 cols[variant_type_idx]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
252 )[cols[variant_classification_idx]]
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
253 ret_line = '\t'.join([cols[n] for n in cols_to_print])
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
254
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
255 field_formatters = {
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
256 'tumor_seq_allele1': gts[0][0],
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
257 'tumor_seq_allele2': gts[0][1],
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
258 'match_norm_seq_allele1': gts[1][0],
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
259 'match_norm_seq_allele2': gts[1][1],
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
260 't_alt_count': gt_alt_depths[0],
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
261 'n_alt_count': gt_alt_depths[1],
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
262 't_ref_count': gt_ref_depths[0],
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
263 'n_ref_count': gt_ref_depths[1],
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
264 }
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
265
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
266 print(
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
267 # use safe_substitute here to avoid key errors with column content
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
268 # looking like unknown placeholders
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
269 string.Template(ret_line).safe_substitute(field_formatters)
142d95ab942e "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 5ea789e5342c3ad1afd2e0068c88f2b6dc4f7246"
iuc
parents:
diff changeset
270 )