Mercurial > repos > galaxyp > cravatool
annotate cravatp_submit.py @ 2:f3027b8f28bd draft
Deleted selected files
author | galaxyp |
---|---|
date | Thu, 16 Aug 2018 12:28:29 -0400 |
parents | 2c7bcc1219fc |
children | a018c44dc18b |
rev | line source |
---|---|
1
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
1 # -*- coding: utf-8 -*- |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
2 # |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
3 # Author: Ray W. Sajulga |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
4 # |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
5 # |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
6 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
7 import requests # pipenv requests |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
8 import json |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
9 import time |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
10 import urllib |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
11 import sys |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
12 import csv |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
13 import re |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
14 import math |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
15 import argparse |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
16 from xml.etree import ElementTree as ET |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
17 from zipfile import ZipFile |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
18 try: #Python 3 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
19 from urllib.request import urlopen |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
20 except ImportError: #Python 2 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
21 from urllib2 import urlopen |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
22 from io import BytesIO |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
23 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
24 # initializes blank parameters |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
25 chasm_classifier = '' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
26 probed_filename = None |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
27 intersected_only = False |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
28 vcf_output = None |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
29 analysis_type = None |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
30 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
31 # # Testing Command |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
32 # python cravatp_submit.py test-data/Freebayes_two-variants.vcf GRCh38 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
33 # test-data/variant.tsv test-data/gene.tsv test-data/noncoding.tsv |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
34 # test-data/error.tsv CHASM -—classifier Breast -—proBED |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
35 # test-data/MCF7_proBed.bed |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
36 parser = argparse.ArgumentParser() |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
37 parser.add_argument('cravatInput',help='The filename of the input ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
38 'CRAVAT-formatted tabular file ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
39 '(e.g., VCF)') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
40 parser.add_argument('GRCh', help='The name of the human reference ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
41 'genome used for annotation: ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
42 'GRCh38/hg38 or GRCh37/hg19') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
43 parser.add_argument('variant', help='The filename of the output ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
44 'variant file') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
45 parser.add_argument('gene', help='The filename of the output gene ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
46 'variant report') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
47 parser.add_argument('noncoding', help='The filename of the output ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
48 'non-coding variant report') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
49 parser.add_argument('error', help='The filename of the output error ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
50 'file') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
51 parser.add_argument('analysis', help='The machine-learning algorithm ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
52 'used for CRAVAT annotation (VEST' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
53 ' and/or CHASM)') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
54 parser.add_argument('--classifier', help='The cancer classifier for the' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
55 ' CHASM algorithm') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
56 parser.add_argument('--proBED', help='The filename of the proBED file ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
57 'containing peptides with genomic ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
58 'coordinates') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
59 parser.add_argument('--intersectOnly', help='Specifies whether to ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
60 'analyze only variants ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
61 'intersected between the ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
62 'CRAVAT input and proBED ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
63 'file') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
64 parser.add_argument('--vcfOutput', help='The output filename of the ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
65 'intersected VCF file') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
66 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
67 # assigns parsed arguments to appropriate variables |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
68 args = parser.parse_args() |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
69 input_filename = args.cravatInput |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
70 GRCh_build = args.GRCh |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
71 output_filename = args.variant |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
72 file_3 = args.gene |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
73 file_4 = args.noncoding |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
74 file_5 = args.error |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
75 if args.analysis != 'None': |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
76 analysis_type = args.analysis |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
77 if args.classifier: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
78 chasm_classifier = args.classifier |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
79 if args.proBED: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
80 probed_filename = args.proBED |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
81 if args.intersectOnly: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
82 intersected_only = args.intersectOnly |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
83 if args.vcfOutput: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
84 vcf_output = args.vcfOutput |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
85 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
86 if analysis_type and '+' in analysis_type: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
87 analysis_type = 'CHASM;VEST' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
88 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
89 # obtains the transcript's protein sequence using Ensembl API |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
90 def getSequence(transcript_id): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
91 server = 'http://rest.ensembl.org' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
92 ext = ('/sequence/id/' + transcript_id |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
93 + '?content-type=text/x-seqxml%2Bxml;' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
94 'multiple_sequences=1;type=protein') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
95 req = requests.get(server+ext, |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
96 headers={ "Content-Type" : "text/plain"}) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
97 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
98 if not req.ok: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
99 return None |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
100 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
101 root = ET.fromstring(req.content) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
102 for child in root.iter('AAseq'): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
103 return child.text |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
104 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
105 # parses the proBED file as a list. |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
106 def loadProBED(): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
107 proBED = [] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
108 with open(probed_filename) as tsvin: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
109 tsvreader = csv.reader(tsvin, delimiter='\t') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
110 for i, row in enumerate(tsvreader): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
111 proBED.append(row) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
112 return proBED |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
113 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
114 write_header = True |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
115 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
116 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
117 # Creates an VCF file that only contains variants that overlap with the |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
118 # proteogenomic input (proBED) file if the user specifies that they want |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
119 # to only include intersected variants or if they want to receive the |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
120 # intersected VCF as well. |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
121 if probed_filename and (vcf_output or intersected_only == 'true'): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
122 proBED = loadProBED() |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
123 if not vcf_output: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
124 vcf_output = 'intersected_input.vcf' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
125 with open(input_filename) as tsvin, open(vcf_output, 'wb') as tsvout: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
126 tsvreader = csv.reader(tsvin, delimiter='\t') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
127 tsvout = csv.writer(tsvout, delimiter='\t', escapechar=' ', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
128 quoting=csv.QUOTE_NONE) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
129 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
130 for row in tsvreader: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
131 if row == [] or row[0][0] == '#': |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
132 tsvout.writerow(row) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
133 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
134 genchrom = row[0] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
135 genpos = int(row[1]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
136 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
137 for peptide in proBED: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
138 pepchrom = peptide[0] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
139 pepposA = int(peptide[1]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
140 pepposB = int(peptide[2]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
141 if (genchrom == pepchrom and |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
142 pepposA <= genpos and |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
143 genpos <= pepposB): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
144 tsvout.writerow(row) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
145 break |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
146 if intersected_only == 'true': |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
147 input_filename = vcf_output |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
148 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
149 # sets up the parameters for submission to the CRAVAT API |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
150 parameters = {'email':'rsajulga@umn.edu', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
151 'hg19': 'on' if GRCh_build == 'GRCh37' else 'off', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
152 'functionalannotation': 'on', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
153 'tsvreport' : 'on', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
154 'mupitinput' : 'on'} |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
155 if analysis_type: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
156 parameters['analyses'] = analysis_type |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
157 if chasm_classifier: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
158 parameters['chasmclassifier'] = chasm_classifier |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
159 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
160 # plugs in params to given URL |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
161 submit = requests.post('http://www.cravat.us/CRAVAT/rest/service/submit', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
162 files = {'inputfile':open(input_filename)}, |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
163 data = parameters) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
164 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
165 # makes the data a json dictionary; takes out only the job ID |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
166 jobid = json.loads(submit.text)['jobid'] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
167 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
168 # loops until we find a status equal to Success, then breaks |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
169 while True: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
170 check = requests.get( |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
171 'http://www.cravat.us/CRAVAT/rest/service/status', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
172 params = {'jobid' : jobid}) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
173 status = json.loads(check.text)['status'] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
174 resultfileurl = json.loads(check.text)['resultfileurl'] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
175 #out_file.write(str(status) + ', ') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
176 if status == 'Success': |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
177 #out_file.write('\t' + resultfileurl) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
178 break |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
179 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
180 time.sleep(2) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
181 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
182 # obtains the zipfile created by CRAVAT and loads the variants and VAD |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
183 # file for processing |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
184 r = requests.get(resultfileurl, stream=True) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
185 url = urlopen(resultfileurl) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
186 zipfile = ZipFile(BytesIO(r.content)) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
187 variants = zipfile.open(jobid + '/Variant.Result.tsv').readlines() |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
188 vad = zipfile.open(jobid + '/Variant_Additional_Details.Result.tsv').readlines() |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
189 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
190 # reads and writes the gene, noncoding, and error files |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
191 open(file_3, 'wb').write(zipfile.read(jobid + '/Gene_Level_Analysis.Result.tsv')) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
192 open(file_4, 'wb').write(zipfile.read(jobid + '/Variant_Non-coding.Result.tsv')) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
193 open(file_5, 'wb').write(zipfile.read(jobid + '/Input_Errors.Result.tsv')) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
194 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
195 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
196 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
197 if probed_filename and not vcf_output: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
198 proBED = loadProBED() |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
199 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
200 if probed_filename: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
201 with open(output_filename, 'w') as tsvout: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
202 tsvout = csv.writer(tsvout, |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
203 delimiter='\t', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
204 escapechar=' ', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
205 quoting=csv.QUOTE_NONE) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
206 n = 11 #Index for proteogenomic column start |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
207 reg_seq_change = re.compile('([A-Z]+)(\d+)([A-Z]+)') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
208 SOtranscripts = re.compile('([A-Z]+[\d\.]+):([A-Z]+\d+[A-Z]+)') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
209 pep_muts = {} |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
210 pep_map = {} |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
211 rows = [] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
212 for row in vad: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
213 row = row.decode().split('\t') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
214 row[-1] = row[-1].replace('\n','') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
215 if row and row[0] and not row[0].startswith('#'): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
216 # checks if the row begins with input line |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
217 if row[0].startswith('Input line'): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
218 vad_headers = row |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
219 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
220 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
221 # Initially screens through the output Variant |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
222 # Additional Details to catch mutations on |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
223 # same peptide region |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
224 genchrom = row[vad_headers.index('Chromosome')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
225 genpos = int(row[vad_headers.index('Position')]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
226 aa_change = row[vad_headers.index('Protein sequence change')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
227 input_line = row[vad_headers.index('Input line')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
228 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
229 for peptide in proBED: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
230 pepseq = peptide[3] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
231 pepchrom = peptide[0] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
232 pepposA = int(peptide[1]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
233 pepposB = int(peptide[2]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
234 if genchrom == pepchrom and pepposA <= genpos and genpos <= pepposB: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
235 strand = row[vad_headers.index('Strand')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
236 transcript_strand = row[vad_headers.index('S.O. transcript strand')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
237 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
238 # Calculates the position of the variant |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
239 # amino acid(s) on peptide |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
240 if transcript_strand == strand: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
241 aa_peppos = int(math.ceil((genpos - pepposA)/3.0) - 1) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
242 if (strand == '-' or transcript_strand == '-' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
243 or aa_peppos >= len(pepseq)): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
244 aa_peppos = int(math.floor((pepposB - genpos)/3.0)) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
245 if pepseq in pep_muts: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
246 if aa_change not in pep_muts[pepseq]: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
247 pep_muts[pepseq][aa_change] = [aa_peppos] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
248 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
249 if aa_peppos not in pep_muts[pepseq][aa_change]: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
250 pep_muts[pepseq][aa_change].append(aa_peppos) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
251 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
252 pep_muts[pepseq] = {aa_change : [aa_peppos]} |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
253 # Stores the intersect information by mapping |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
254 # Input Line (CRAVAT output) to peptide sequence. |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
255 if input_line in pep_map: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
256 if pepseq not in pep_map[input_line]: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
257 pep_map[input_line].append(pepseq) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
258 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
259 pep_map[input_line] = [pepseq] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
260 # TODO: Need to obtain strand information as |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
261 # well i.e., positive (+) or negative (-) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
262 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
263 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
264 with open(output_filename, 'w') as tsvout: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
265 tsvout = csv.writer(tsvout, delimiter='\t', escapechar='', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
266 quoting=csv.QUOTE_NONE) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
267 headers = [] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
268 duplicate_indices = [] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
269 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
270 # loops through each row in the Variant Additional Details (VAD) file |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
271 for x, row in enumerate(variants): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
272 row = row.decode().split('\t') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
273 row[-1] = row[-1].replace('\n','') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
274 # sets row_2 equal to the same row in Variant Result (VR) file |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
275 row_2 = vad[x].decode().split('\t') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
276 row_2[-1] = row_2[-1].replace('\n','') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
277 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
278 # checks if row is empty or if the first term contains '#' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
279 if not row or not row[0] or row[0].startswith('#'): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
280 if row[0]: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
281 tsvout.writerow(row) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
282 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
283 if row[0].startswith('Input line'): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
284 # goes through each value in the headers list in VAD |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
285 headers = row |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
286 # loops through the Keys in VR |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
287 for i,value in enumerate(row_2): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
288 #Checks if the value is already in headers |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
289 if value in headers: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
290 duplicate_indices.append(i) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
291 continue |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
292 #else adds the header to headers |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
293 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
294 headers.append(value) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
295 # adds appropriate headers when proteomic input is supplied |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
296 if probed_filename: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
297 headers.insert(n, 'Variant peptide') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
298 headers.insert(n, 'Reference peptide') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
299 tsvout.writerow(headers) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
300 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
301 cells = [] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
302 # goes through each value in the next list |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
303 for value in row: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
304 #adds it to cells |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
305 cells.append(value) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
306 # goes through each value from the VR file after position |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
307 # 11 (After it is done repeating from VAD file) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
308 for i,value in enumerate(row_2): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
309 # adds in the rest of the values to cells |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
310 if i not in duplicate_indices: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
311 # Skips the initial 11 columns and the |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
312 # VEST p-value (already in VR file) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
313 cells.append(value) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
314 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
315 # Verifies the peptides intersected previously through |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
316 # sequences obtained from Ensembl's API |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
317 if probed_filename: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
318 cells.insert(n,'') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
319 cells.insert(n,'') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
320 input_line = cells[headers.index('Input line')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
321 if input_line in pep_map: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
322 pepseq = pep_map[input_line][0] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
323 aa_changes = pep_muts[pepseq] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
324 transcript_id = cells[headers.index('S.O. transcript')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
325 ref_fullseq = getSequence(transcript_id) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
326 # Checks the other S.O. transcripts if the primary |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
327 # S.O. transcript has no sequence available |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
328 if not ref_fullseq: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
329 transcripts = cells[headers.index('S.O. all transcripts')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
330 for transcript in transcripts.split(','): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
331 if transcript: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
332 mat = SOtranscripts.search(transcript) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
333 ref_fullseq = getSequence(mat.group(1)) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
334 if ref_fullseq: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
335 aa_changes = {mat.group(2): [aa_changes.values()[0][0]]} |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
336 break |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
337 # Resubmits the previous transcripts without |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
338 # extensions if all S.O. transcripts fail to |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
339 # provide a sequence |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
340 if not ref_fullseq: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
341 transcripts = cells[headers.index('S.O. all transcripts')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
342 for transcript in transcripts.split(','): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
343 if transcript: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
344 mat = SOtranscripts.search(transcript) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
345 ref_fullseq = getSequence(mat.group(1).split('.')[0]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
346 if ref_fullseq: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
347 aa_changes = {mat.group(2): [aa_changes.values()[0][0]]} |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
348 break |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
349 if ref_fullseq: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
350 # Sorts the amino acid changes |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
351 positions = {} |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
352 for aa_change in aa_changes: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
353 m = reg_seq_change.search(aa_change) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
354 aa_protpos = int(m.group(2)) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
355 aa_peppos = aa_changes[aa_change][0] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
356 aa_startpos = aa_protpos - aa_peppos - 1 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
357 if aa_startpos in positions: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
358 positions[aa_startpos].append(aa_change) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
359 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
360 positions[aa_startpos] = [aa_change] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
361 # Goes through the sorted categories to mutate the Ensembl peptide |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
362 # (uses proBED peptide as a reference) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
363 for pep_protpos in positions: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
364 ref_seq = ref_fullseq[pep_protpos:pep_protpos+len(pepseq)] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
365 muts = positions[pep_protpos] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
366 options = [] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
367 mut_seq = ref_seq |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
368 for mut in muts: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
369 m = reg_seq_change.search(mut) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
370 ref_aa = m.group(1) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
371 mut_pos = int(m.group(2)) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
372 alt_aa = m.group(3) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
373 pep_mutpos = mut_pos - pep_protpos - 1 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
374 if (ref_seq[pep_mutpos] == ref_aa |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
375 and (pepseq[pep_mutpos] == alt_aa |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
376 or pepseq[pep_mutpos] == ref_aa)): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
377 if pepseq[pep_mutpos] == ref_aa: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
378 mut_seq = (mut_seq[:pep_mutpos] + ref_aa |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
379 + mut_seq[pep_mutpos+1:]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
380 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
381 mut_seq = (mut_seq[:pep_mutpos] + alt_aa |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
382 + mut_seq[pep_mutpos+1:]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
383 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
384 break |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
385 # Adds the mutated peptide and reference peptide if mutated correctly |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
386 if pepseq == mut_seq: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
387 cells[n+1] = pepseq |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
388 cells[n] = ref_seq |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
389 tsvout.writerow(cells) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
390 |