Mercurial > repos > bornea > saint_preprocessing
comparison SAINT_preprocessing.py @ 11:b688d0dae86b draft
Uploaded
| author | bornea |
|---|---|
| date | Tue, 12 Apr 2016 12:54:38 -0400 |
| parents | 1b0547d3c7bc |
| children | 3e1b66d58f94 |
comparison
equal
deleted
inserted
replaced
| 10:1b0547d3c7bc | 11:b688d0dae86b |
|---|---|
| 23 # 5) make_bait: String of bait names, assignment, and test or control boolean | 23 # 5) make_bait: String of bait names, assignment, and test or control boolean |
| 24 ####################################################################################### | 24 ####################################################################################### |
| 25 | 25 |
| 26 import sys | 26 import sys |
| 27 import os.path | 27 import os.path |
| 28 import re | |
| 28 | 29 |
| 29 | 30 |
| 30 infile = sys.argv[1] | 31 infile = sys.argv[1] |
| 31 #Scaffold "Samples Report" output. | 32 #Scaffold "Samples Report" output. |
| 32 prey = sys.argv[2] | 33 prey = sys.argv[2] |
| 195 # Cut off blank line and END OF FILE. | 196 # Cut off blank line and END OF FILE. |
| 196 proteins = [] | 197 proteins = [] |
| 197 for Scaffold_line in data: | 198 for Scaffold_line in data: |
| 198 Scaffold_line[4] = Scaffold_line[4].split()[0] | 199 Scaffold_line[4] = Scaffold_line[4].split()[0] |
| 199 # Removes the (+##) that sometimes is attached. | 200 # Removes the (+##) that sometimes is attached. |
| 200 for protein in data: | 201 uniprot_re = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") |
| 201 proteins.append(protein[prot_start]) | 202 for protein in data: |
| 203 prot_id = uniprot_re.match(protein[prot_start]) | |
| 204 proteins.append(prot_id.group()) | |
| 202 return ReturnValue2(data, proteins, header) | 205 return ReturnValue2(data, proteins, header) |
| 203 | 206 |
| 204 | 207 |
| 205 def make_inter(Scaffold_input): | 208 def make_inter(Scaffold_input): |
| 206 bait = readtab(baitfile) | 209 bait = readtab(baitfile) |
