# HG changeset patch # User bornea # Date 1463673537 14400 # Node ID 265f5ae47a56d8f1447e96697f01e3534cd7e374 # Parent 8ca1d3bc590648499d2eaae596b15aaa180a13d5 Uploaded diff -r 8ca1d3bc5906 -r 265f5ae47a56 SAINT_preprocessing.py --- a/SAINT_preprocessing.py Thu May 19 11:54:41 2016 -0400 +++ b/SAINT_preprocessing.py Thu May 19 11:58:57 2016 -0400 @@ -238,29 +238,43 @@ def no_error_inter(Scaffold_input): # Remake inter file without protein errors from Uniprot. - err = readtab("error proteins.txt") + err = readtab("./error_proteins.txt") bait = readtab(baitfile) data = read_Scaffold(Scaffold_input).data header = read_Scaffold(Scaffold_input).header + header = [MQ_var.replace(r"\"", "") for MQ_var in header] + header = [MQ_var.replace(r"Intensity.", r"") for MQ_var in header] + header = [MQ_var.replace(r".", r"-") for MQ_var in header] bait_index = [] - for bait_line in bait: - bait_index.append(header.index(bait_line[0])) + for bait_item in bait: + bait_index.append(header.index(bait_item[0])) proteins = read_Scaffold(Scaffold_input).proteins errors = [] + valid_prots = [] for e in err: errors.append(e[0]) - with open('inter.txt', 'w') as y: + for a in proteins: + a = a.replace("\n", "") + # Remove \n for input into function. + a = a.replace("\r", "") + # Ditto for \r. + seq = get_info(a).seqlength + GN = get_info(a).genename + if seq != 'NA': + if GN != 'NA': + valid_prots.append(a) + with open('inter.txt', 'w') as input_file: l = 0; a = 0 for bb in bait: for lst in data: - if proteins[a] not in errors: - y.write(header[bait_index[l]] + '\t' + bb[1] + '\t' + proteins[a] + '\t' - + lst[bait_index[l]] + '\n') + if lst[0] in valid_prots: + input_file.write(header[bait_index[l]] + '\t' + bb[1] + '\t' + lst[0] + '\t' + lst[bait_index[l]] + '\n') a += 1 if a == len(proteins): l += 1; a = 0 + def bait_check(bait, Scaffold_input): # Check that bait names share Scaffold header titles. bait_in = readtab(bait)