changeset 48:265f5ae47a56 draft

Uploaded
author bornea
date Thu, 19 May 2016 11:58:57 -0400
parents 8ca1d3bc5906
children 6bb30aeb02bd
files SAINT_preprocessing.py
diffstat 1 files changed, 21 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/SAINT_preprocessing.py	Thu May 19 11:54:41 2016 -0400
+++ b/SAINT_preprocessing.py	Thu May 19 11:58:57 2016 -0400
@@ -238,29 +238,43 @@
 
 def no_error_inter(Scaffold_input):
     # Remake inter file without protein errors from Uniprot.
-    err = readtab("error proteins.txt")
+    err = readtab("./error_proteins.txt")
     bait = readtab(baitfile)
     data = read_Scaffold(Scaffold_input).data
     header = read_Scaffold(Scaffold_input).header
+    header = [MQ_var.replace(r"\"", "") for MQ_var in header]
+    header = [MQ_var.replace(r"Intensity.", r"") for MQ_var in header]
+    header = [MQ_var.replace(r".", r"-") for MQ_var in header]
     bait_index = []
-    for bait_line in bait:
-        bait_index.append(header.index(bait_line[0]))
+    for bait_item in bait:
+        bait_index.append(header.index(bait_item[0]))
     proteins = read_Scaffold(Scaffold_input).proteins
     errors = []
+    valid_prots = []
     for e in err:
         errors.append(e[0])
-    with open('inter.txt', 'w') as y:
+    for a in proteins:
+        a = a.replace("\n", "")
+        # Remove \n for input into function.
+        a = a.replace("\r", "")
+        # Ditto for \r.
+        seq = get_info(a).seqlength
+        GN = get_info(a).genename
+        if seq != 'NA':
+            if GN != 'NA':
+                valid_prots.append(a)
+    with open('inter.txt', 'w') as input_file:
         l = 0; a = 0
         for bb in bait:
             for lst in data:
-                if proteins[a] not in errors:
-                    y.write(header[bait_index[l]] + '\t' + bb[1] + '\t' + proteins[a] + '\t'
-                            + lst[bait_index[l]] + '\n')
+                if lst[0] in valid_prots:
+                    input_file.write(header[bait_index[l]] + '\t' + bb[1] + '\t' + lst[0] + '\t' + lst[bait_index[l]] + '\n')
                 a += 1
                 if a == len(proteins):
                     l += 1; a = 0
 
 
+
 def bait_check(bait, Scaffold_input): 
     # Check that bait names share Scaffold header titles.
     bait_in = readtab(bait)