Mercurial > repos > galaxyp > cravatool
annotate cravat_submit.py @ 0:83181dabeb90 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
author | galaxyp |
---|---|
date | Fri, 18 May 2018 13:25:29 -0400 |
parents | |
children |
rev | line source |
---|---|
0
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
1 import requests |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
2 import json |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
3 import time |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
4 import urllib |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
5 import sys |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
6 import csv |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
7 import re |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
8 import math |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
9 from difflib import SequenceMatcher |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
10 from xml.etree import ElementTree as ET |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
11 import sqlite3 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
12 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
13 try: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
14 input_filename = sys.argv[1] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
15 input_select_bar = sys.argv[2] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
16 GRCh_build = sys.argv[3] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
17 probed_filename = sys.argv[4] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
18 output_filename = sys.argv[5] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
19 file_3 = sys.argv[6] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
20 file_4 = sys.argv[7] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
21 file_5 = sys.argv[8] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
22 except: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
23 # Filenames for testing. |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
24 input_filename = 'test-data/[VCF-BEDintersect__on_data_65_and_data_6].vcf' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
25 probed_filename = 'test-data/[PepPointer].bed' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
26 input_select_bar = 'VEST' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
27 GRCh_build = 'GRCh38' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
28 output_filename = 'combined_variants.tsv' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
29 file_3 = 'test-results/Gene_Level_Analysis.tsv' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
30 file_4 = 'test-results/Variant_Non-coding.Result.tsv' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
31 file_5 = 'test-results/Input_Errors.Result.tsv' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
32 matches_filename = 'matches.tsv' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
33 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
34 def getSequence(transcript_id): |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
35 server = 'http://rest.ensembl.org' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
36 ext = '/sequence/id/' + transcript_id + '?content-type=text/x-seqxml%2Bxml;multiple_sequences=1;type=protein' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
37 req = requests.get(server+ext, headers={ "Content-Type" : "text/plain"}) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
38 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
39 if not req.ok: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
40 return None |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
41 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
42 root = ET.fromstring(req.content) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
43 for child in root.iter('AAseq'): |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
44 return child.text |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
45 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
46 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
47 write_header = True |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
48 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
49 GRCh37hg19 = 'off' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
50 if GRCh_build == 'GRCh37': |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
51 GRCh37hg19 = 'on' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
52 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
53 #plugs in params to given URL |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
54 submit = requests.post('http://staging.cravat.us/CRAVAT/rest/service/submit', files={'inputfile':open(input_filename)}, data={'email':'znylund@insilico.us.com', 'analyses': input_select_bar, 'hg19': GRCh37hg19}) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
55 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
56 #Makes the data a json dictionary, takes out only the job ID |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
57 jobid = json.loads(submit.text)['jobid'] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
58 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
59 #out_file.write(jobid) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
60 submitted = json.loads(submit.text)['status'] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
61 #out_file.write('\t' + submitted) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
62 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
63 input_file = open(input_filename) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
64 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
65 # Loads the proBED file as a list. |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
66 if (probed_filename != 'None'): |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
67 proBED = [] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
68 with open(probed_filename) as tsvin: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
69 tsvreader = csv.reader(tsvin, delimiter='\t') |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
70 for i, row in enumerate(tsvreader): |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
71 proBED.append(row) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
72 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
73 #loops until we find a status equal to Success, then breaks |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
74 while True: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
75 check = requests.get('http://staging.cravat.us/CRAVAT/rest/service/status', params={'jobid': jobid}) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
76 status = json.loads(check.text)['status'] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
77 resultfileurl = json.loads(check.text)['resultfileurl'] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
78 #out_file.write(str(status) + ', ') |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
79 if status == 'Success': |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
80 #out_file.write('\t' + resultfileurl) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
81 break |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
82 else: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
83 time.sleep(2) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
84 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
85 #out_file.write('\n') |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
86 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
87 #creates three files |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
88 file_1 = 'Variant_Result.tsv' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
89 file_2 = 'Additional_Details.tsv' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
90 #file_3 = time.strftime("%H:%M") + 'Combined_Variant_Results.tsv' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
91 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
92 #Downloads the tabular results |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
93 urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Variant.Result.tsv", file_1) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
94 urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Variant_Additional_Details.Result.tsv", file_2) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
95 urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Gene_Level_Analysis.Result.tsv", file_3) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
96 urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Variant_Non-coding.Result.tsv", file_4) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
97 urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Input_Errors.Result.tsv", file_5) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
98 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
99 #opens the Variant Result file and the Variant Additional Details file as csv readers, then opens the output file (galaxy) as a writer |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
100 with open(file_1) as tsvin_1, open(file_2) as tsvin_2, open(output_filename, 'wb') as tsvout: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
101 tsvreader_2 = csv.reader(tsvin_2, delimiter='\t') |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
102 tsvout = csv.writer(tsvout, delimiter='\t') |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
103 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
104 headers = [] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
105 duplicate_indices = [] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
106 n = 12 #Index for proteogenomic column start |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
107 reg_seq_change = re.compile('([A-Z]+)(\d+)([A-Z]+)') |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
108 SOtranscripts = re.compile('([A-Z]+[\d\.]+):([A-Z]+\d+[A-Z]+)') |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
109 pep_muts = {} |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
110 pep_map = {} |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
111 rows = [] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
112 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
113 for row in tsvreader_2: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
114 if row != [] and row[0][0] != '#': |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
115 #checks if the row begins with input line |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
116 if row[0] == 'Input line': |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
117 vad_headers = row |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
118 else: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
119 # Initially screens through the output Variant Additional Details to catch mutations on same peptide region |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
120 genchrom = row[vad_headers.index('Chromosome')] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
121 genpos = int(row[vad_headers.index('Position')]) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
122 aa_change = row[vad_headers.index('Protein sequence change')] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
123 input_line = row[vad_headers.index('Input line')] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
124 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
125 for peptide in proBED: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
126 pepseq = peptide[3] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
127 pepchrom = peptide[0] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
128 pepposA = int(peptide[1]) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
129 pepposB = int(peptide[2]) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
130 if genchrom == pepchrom and pepposA <= genpos and genpos <= pepposB: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
131 strand = row[vad_headers.index('Strand')] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
132 transcript_strand = row[vad_headers.index('S.O. transcript strand')] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
133 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
134 # Calculates the position of the variant amino acid(s) on peptide |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
135 if transcript_strand == strand: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
136 aa_peppos = int(math.ceil((genpos - pepposA)/3.0) - 1) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
137 if strand == '-' or transcript_strand == '-' or aa_peppos >= len(pepseq): |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
138 aa_peppos = int(math.floor((pepposB - genpos)/3.0)) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
139 if pepseq in pep_muts: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
140 if aa_change not in pep_muts[pepseq]: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
141 pep_muts[pepseq][aa_change] = [aa_peppos] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
142 else: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
143 if aa_peppos not in pep_muts[pepseq][aa_change]: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
144 pep_muts[pepseq][aa_change].append(aa_peppos) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
145 else: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
146 pep_muts[pepseq] = {aa_change : [aa_peppos]} |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
147 # Stores the intersect information by mapping Input Line (CRAVAT output) to peptide sequence. |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
148 if input_line in pep_map: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
149 if pepseq not in pep_map[input_line]: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
150 pep_map[input_line].append(pepseq) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
151 else: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
152 pep_map[input_line] = [pepseq] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
153 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
154 with open(file_1) as tsvin_1, open(file_2) as tsvin_2, open(output_filename, 'wb') as tsvout: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
155 tsvreader_1 = csv.reader(tsvin_1, delimiter='\t') |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
156 tsvreader_2 = csv.reader(tsvin_2, delimiter='\t') |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
157 tsvout = csv.writer(tsvout, delimiter='\t') |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
158 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
159 headers = [] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
160 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
161 #loops through each row in the Variant Additional Details (VAD) file |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
162 for row in tsvreader_2: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
163 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
164 #sets row_2 equal to the same row in Variant Result (VR) file |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
165 row_2 = tsvreader_1.next() |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
166 #checks if row is empty or if the first term contains '#' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
167 if row == [] or row[0][0] == '#': |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
168 tsvout.writerow(row) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
169 else: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
170 if row[0] == 'Input line': |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
171 #Goes through each value in the headers list in VAD |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
172 for value in row: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
173 #Adds each value into headers |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
174 headers.append(value) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
175 #Loops through the Keys in VR |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
176 for i,value in enumerate(row_2): |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
177 #Checks if the value is already in headers |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
178 if value in headers: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
179 duplicate_indices.append(i) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
180 continue |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
181 #else adds the header to headers |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
182 else: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
183 headers.append(value) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
184 #Adds appropriate headers when proteomic input is supplied |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
185 if (probed_filename != 'None'): |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
186 headers.insert(n, 'Variant peptide') |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
187 headers.insert(n, 'Reference peptide') |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
188 tsvout.writerow(headers) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
189 else: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
190 cells = [] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
191 #Goes through each value in the next list |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
192 for value in row: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
193 #adds it to cells |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
194 cells.append(value) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
195 #Goes through each value from the VR file after position 11 (After it is done repeating from VAD file) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
196 for i,value in enumerate(row_2): |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
197 #adds in the rest of the values to cells |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
198 if i not in duplicate_indices: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
199 # Skips the initial 11 columns and the VEST p-value (already in VR file) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
200 cells.append(value) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
201 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
202 # Verifies the peptides intersected previously through sequences obtained from Ensembl's API |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
203 if (probed_filename != 'None'): |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
204 cells.insert(n,'') |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
205 cells.insert(n,'') |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
206 input_line = cells[headers.index('Input line')] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
207 if input_line in pep_map: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
208 pepseq = pep_map[input_line][0] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
209 aa_changes = pep_muts[pepseq] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
210 transcript_id = cells[headers.index('S.O. transcript')] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
211 ref_fullseq = getSequence(transcript_id) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
212 # Checks the other S.O. transcripts if the primary S.O. transcript has no sequence available |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
213 if not ref_fullseq: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
214 transcripts = cells[headers.index('S.O. all transcripts')] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
215 for transcript in transcripts.split(','): |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
216 if transcript: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
217 mat = SOtranscripts.search(transcript) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
218 ref_fullseq = getSequence(mat.group(1)) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
219 if ref_fullseq: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
220 aa_changes = {mat.group(2): [aa_changes.values()[0][0]]} |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
221 break |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
222 # Resubmits the previous transcripts without extensions if all S.O. transcripts fail to provide a sequence |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
223 if not ref_fullseq: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
224 transcripts = cells[headers.index('S.O. all transcripts')] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
225 for transcript in transcripts.split(','): |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
226 if transcript: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
227 mat = SOtranscripts.search(transcript) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
228 ref_fullseq = getSequence(mat.group(1).split('.')[0]) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
229 if ref_fullseq: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
230 aa_changes = {mat.group(2): [aa_changes.values()[0][0]]} |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
231 break |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
232 if ref_fullseq: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
233 # Sorts the amino acid changes |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
234 positions = {} |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
235 for aa_change in aa_changes: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
236 m = reg_seq_change.search(aa_change) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
237 aa_protpos = int(m.group(2)) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
238 aa_peppos = aa_changes[aa_change][0] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
239 aa_startpos = aa_protpos - aa_peppos - 1 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
240 if aa_startpos in positions: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
241 positions[aa_startpos].append(aa_change) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
242 else: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
243 positions[aa_startpos] = [aa_change] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
244 # Goes through the sorted categories to mutate the Ensembl peptide (uses proBED peptide as a reference) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
245 for pep_protpos in positions: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
246 ref_seq = ref_fullseq[pep_protpos:pep_protpos+len(pepseq)] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
247 muts = positions[pep_protpos] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
248 options = [] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
249 mut_seq = ref_seq |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
250 for mut in muts: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
251 m = reg_seq_change.search(mut) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
252 ref_aa = m.group(1) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
253 mut_pos = int(m.group(2)) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
254 alt_aa = m.group(3) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
255 pep_mutpos = mut_pos - pep_protpos - 1 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
256 if ref_seq[pep_mutpos] == ref_aa and (pepseq[pep_mutpos] == alt_aa or pepseq[pep_mutpos] == ref_aa): |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
257 if pepseq[pep_mutpos] == ref_aa: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
258 mut_seq = mut_seq[:pep_mutpos] + ref_aa + mut_seq[pep_mutpos+1:] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
259 else: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
260 mut_seq = mut_seq[:pep_mutpos] + alt_aa + mut_seq[pep_mutpos+1:] |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
261 else: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
262 break |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
263 # Adds the mutated peptide and reference peptide if mutated correctly |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
264 if pepseq == mut_seq: |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
265 cells[n+1] = pepseq |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
266 cells[n] = ref_seq |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
267 #print cells |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
268 tsvout.writerow(cells) |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
269 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
270 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
271 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
272 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
273 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
274 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
275 #a = 'col1\tcol2\tcol3' |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
276 #header_list = a.split('\t') |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
277 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
278 #loop through the two results, when you first hit header you print out the headers in tabular form |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
279 #Print out each header only once |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
280 #Combine both headers into one output file |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
281 #loop through the rest of the data and assign each value to its assigned header |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
282 #combine this all into one output file |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
283 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
284 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
285 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
286 |
83181dabeb90
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3
galaxyp
parents:
diff
changeset
|
287 |