comparison SAINT_preprocessing.py @ 11:b688d0dae86b draft

Uploaded
author bornea
date Tue, 12 Apr 2016 12:54:38 -0400
parents 1b0547d3c7bc
children 3e1b66d58f94
comparison
equal deleted inserted replaced
10:1b0547d3c7bc 11:b688d0dae86b
23 # 5) make_bait: String of bait names, assignment, and test or control boolean 23 # 5) make_bait: String of bait names, assignment, and test or control boolean
24 ####################################################################################### 24 #######################################################################################
25 25
26 import sys 26 import sys
27 import os.path 27 import os.path
28 import re
28 29
29 30
30 infile = sys.argv[1] 31 infile = sys.argv[1]
31 #Scaffold "Samples Report" output. 32 #Scaffold "Samples Report" output.
32 prey = sys.argv[2] 33 prey = sys.argv[2]
195 # Cut off blank line and END OF FILE. 196 # Cut off blank line and END OF FILE.
196 proteins = [] 197 proteins = []
197 for Scaffold_line in data: 198 for Scaffold_line in data:
198 Scaffold_line[4] = Scaffold_line[4].split()[0] 199 Scaffold_line[4] = Scaffold_line[4].split()[0]
199 # Removes the (+##) that sometimes is attached. 200 # Removes the (+##) that sometimes is attached.
200 for protein in data: 201 uniprot_re = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}")
201 proteins.append(protein[prot_start]) 202 for protein in data:
203 prot_id = uniprot_re.match(protein[prot_start])
204 proteins.append(prot_id.group())
202 return ReturnValue2(data, proteins, header) 205 return ReturnValue2(data, proteins, header)
203 206
204 207
205 def make_inter(Scaffold_input): 208 def make_inter(Scaffold_input):
206 bait = readtab(baitfile) 209 bait = readtab(baitfile)