Previous changeset 71:f6d916d1d304 (2016-08-27) Next changeset 73:50391fdc229a (2016-08-27) |
Commit message:
Uploaded |
modified:
mzID_process2.py |
b |
diff -r f6d916d1d304 -r 5ec0b997fb13 mzID_process2.py --- a/mzID_process2.py Sat Aug 27 23:22:20 2016 -0400 +++ b/mzID_process2.py Sat Aug 27 23:26:10 2016 -0400 |
[ |
b'@@ -6,7 +6,7 @@\n """\n #######################################################################################\n ## Description: ##\n-# This program will create inter, prey, and bait files from mzIdentML files\n+#This program will create inter, prey, and bait files from mzIdentML files\n ## Required input: ##\n # 1) mzIdentML file to be reformatted\n # 2) minimum PSM for quantification\n@@ -18,95 +18,95 @@\n ins_path = sys.argv[5]\n \n class ReturnValue1(object):\n- def __init__(self, sequence, gene):\n- self.seqlength = sequence\n- self.genename = gene\n+\tdef __init__(self, sequence, gene):\n+\t\tself.seqlength = sequence\n+\t\tself.genename = gene\n class ReturnValue2(object):\n- def __init__(self, inter, accessions):\n- self.inter = inter\n- self.accessions = accessions\n+\tdef __init__(self, inter, accessions):\n+\t\tself.inter = inter\n+\t\tself.accessions = accessions\n def read_tab(infile):\n- with open(infile,\'r\') as x:\n- output = []\n- for line in x:\n- line = line.strip()\n- temp = line.split(\'\\t\')\n- output.append(temp)\n- return output\n+\twith open(infile,\'r\') as x:\n+\t\toutput = []\n+\t\tfor line in x:\n+\t\t\tline = line.strip()\n+\t\t\ttemp = line.split(\'\\t\')\n+\t\t\toutput.append(temp)\n+\treturn output\n def printProgress (iteration, total, prefix = \'\', suffix = \'\', decimals = 1, barLength = 100):\n- """\n- Call in a loop to create terminal progress bar\n- @params:\n- iteration - Required : current iteration (Int)\n- total - Required : total iterations (Int)\n- prefix - Optional : prefix string (Str)\n- suffix - Optional : suffix string (Str)\n- decimals - Optional : positive number of decimals in percent complete (Int)\n- barLength - Optional : character length of bar (Int)\n- """\n- formatStr = "{0:." + str(decimals) + "f}"\n- percents = formatStr.format(100 * (iteration / float(total)))\n- filledLength = int(round(barLength * iteration / float(total)))\n- bar = \'=\' * filledLength + \'-\' * (barLength - filledLength)\n- sys.stdout.write(\'\\r%s |%s| %s%s %s\' % (prefix, bar, percents, \'%\', suffix)),\n- sys.stdout.flush()\n- if iteration == total:\n- sys.stdout.write(\'\\n\')\n- sys.stdout.flush()\n+\t"""\n+\tCall in a loop to create terminal progress bar\n+\t@params:\n+\t\titeration - Required : current iteration (Int)\n+\t\ttotal\t - Required : total iterations (Int)\n+\t\tprefix\t - Optional : prefix string (Str)\n+\t\tsuffix\t - Optional : suffix string (Str)\n+\t\tdecimals\t- Optional : positive number of decimals in percent complete (Int)\n+\t\tbarLength - Optional : character length of bar (Int)\n+\t"""\n+\tformatStr\t = "{0:." + str(decimals) + "f}"\n+\tpercents\t\t= formatStr.format(100 * (iteration / float(total)))\n+\tfilledLength\t= int(round(barLength * iteration / float(total)))\n+\tbar\t\t\t = \'=\' * filledLength + \'-\' * (barLength - filledLength)\n+\tsys.stdout.write(\'\\r%s |%s| %s%s %s\' % (prefix, bar, percents, \'%\', suffix)),\n+\tsys.stdout.flush()\n+\tif iteration == total:\n+\t\tsys.stdout.write(\'\\n\')\n+\t\tsys.stdout.flush()\n def get_info(uniprot_accession_in,fasta_db): \n- # Get aminoacid lengths and gene name.\n- error = open(\'error proteins.txt\', \'a+\')\n- data = open(fasta_db, \'r\')\n- data_lines = data.readlines()\n- db_len = len(data_lines)\n- seqlength = 0\n- count = 0\n- last_line = data_lines[-1]\n- for data_line in data_lines:\n- if ">sp" in data_line:\n- namer = data_line.split("|")[2]\n- if uniprot_accession_in == data_line.split("|")[1]:\n- match = count + 1\n- if \'GN=\' in data_line:\n- lst = data_line.split(\'GN=\')\n- lst2 = lst[1].split(\' \')\n- genename = lst2[0]\n- if \'GN=\' not in data_line:\n- genename = \'NA\'\n- while ">sp" not in data_lines[match]:\n- if match <= db_len:\n- '..b'match].strip())\n+\t\t\t\t\t\tif data_lines[match] == last_line:\n+\t\t\t\t\t\t\tbreak\n+\t\t\t\t\t\tmatch = match + 1\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tbreak\n+\t\t\t\treturn ReturnValue1(seqlength, genename)\n+\t\tif uniprot_accession_in == namer.split(" ")[0]:\n+\t\t\tmatch = count + 1\n+\t\t\t# Ensures consistent spacing throughout.\n+\t\t\tif \'GN=\' in data_line:\n+\t\t\t\tlst = data_line.split(\'GN=\')\n+\t\t\t\tlst2 = lst[1].split(\' \')\n+\t\t\t\tgenename = lst2[0]\n+\t\t\tif \'GN=\' not in data_line:\n+\t\t\t\tgenename = \'NA\'\n+\t\t\twhile ">sp" not in data_lines[match]:\n+\t\t\t\tif match <= db_len:\n+\t\t\t\t\tseqlength = seqlength + len(data_lines[match].strip())\n+\t\t\t\t\tif data_lines[match] == last_line:\n+\t\t\t\t\t\tbreak\n+\t\t\t\t\tmatch = match + 1\n+\t\t\t\telse:\n+\t\t\t\t\tbreak\n+\t\t\treturn ReturnValue1(seqlength, genename)\n+\t\tcount = count + 1\n+\tif seqlength == 0:\n+\t\terror.write(uniprot_accession_in + \'\\t\' + "Uniprot not in Fasta" + \'\\n\')\n+\t\terror.close\n+\t\tseqlength = \'NA\'\n+\t\tgenename = \'NA\'\n+\t\treturn ReturnValue1(seqlength, genename)\n def make_inter(mzIdentML,replicate,grouping):\n \taccession_index = mzIdentML[0].index("accession")\n \tPSMs = {}\n@@ -135,7 +135,7 @@\n make_prey = sys.argv[3]\n db = sys.argv[4]\n if db == "None":\n- db = str(ins_path) + "/SwissProt_HUMAN_2015_12.fasta"\n+\tdb = str(ins_path) + "/SwissProt_HUMAN_2015_12.fasta"\n make_bait = sys.argv[6]\n bait_bool = sys.argv[7]\n prey_file = sys.argv[8]\n@@ -143,37 +143,37 @@\n inter_out = sys.argv[10]\n \n def bait_create(baits, infile):\n- # Verifies the Baits are valid in the Scaffold file and writes the Bait.txt.\n- baits = make_bait.split()\n- i = 0\n- bait_file_tmp = open("bait.txt", "w")\n- order = []\n- bait_cache = []\n- while i < len(baits):\n- if baits[i+2] == "true":\n- T_C = "C"\n- else:\n- T_C = "T"\n- bait_line = baits[i] + "\\t" + baits[i+1] + "\\t" + T_C + "\\n"\n- bait_cache.append(str(bait_line))\n- i = i + 3\n+\t# Verifies the Baits are valid in the Scaffold file and writes the Bait.txt.\n+\tbaits = make_bait.split()\n+\ti = 0\n+\tbait_file_tmp = open("bait.txt", "w")\n+\torder = []\n+\tbait_cache = []\n+\twhile i < len(baits):\n+\t\tif baits[i+2] == "true":\n+\t\t\tT_C = "C"\n+\t\telse:\n+\t\t\tT_C = "T"\n+\t\tbait_line = baits[i] + "\\t" + baits[i+1] + "\\t" + T_C + "\\n"\n+\t\tbait_cache.append(str(bait_line))\n+\t\ti = i + 3\n \n- for cache_line in bait_cache:\n- bait_file_tmp.write(cache_line)\n+\tfor cache_line in bait_cache:\n+\t\tbait_file_tmp.write(cache_line)\n \n- bait_file_tmp.close()\n+\tbait_file_tmp.close()\n \n if bait_bool == \'false\':\n- bait_create(make_bait, infile)\n- bait = "bait.txt"\n+\tbait_create(make_bait, infile)\n+\tbait = "bait.txt"\n else:\n- bait_temp_file = open(sys.argv[2], \'r\')\n- bait_cache = bait_temp_file.readlines()\n- bait_file_tmp = open("bait.txt", "wr")\n- for cache_line in bait_cache:\n- bait_file_tmp.write(cache_line)\n- bait_file_tmp.close()\n- bait = "bait.txt"\n+\tbait_temp_file = open(sys.argv[2], \'r\')\n+\tbait_cache = bait_temp_file.readlines()\n+\tbait_file_tmp = open("bait.txt", "wr")\n+\tfor cache_line in bait_cache:\n+\t\tbait_file_tmp.write(cache_line)\n+\tbait_file_tmp.close()\n+\tbait = "bait.txt"\n bait = read_tab("bait.txt")\n \n inter = ""\n@@ -184,9 +184,9 @@\n \tos.system(cmd)\n \tmzIdentML = read_tab("flat_mzIdentML.txt")\n \tinter = inter + make_inter(mzIdentML,bait[cnt][0],bait[cnt][1]).inter\n- print inter\n+\tprint inter\n \taccessions.append(make_inter(mzIdentML,bait[cnt][0],bait[cnt][1]).accessions)\n- print accessions\n+\tprint accessions\n \tcnt+=1\n \n with open("inter.txt","w") as x:\n@@ -205,11 +205,11 @@\n \tfor i in unique_accessions:\n \t\tprey = prey + i + "\\t" + str(get_info(i,db).seqlength) + "\\t" + get_info(i,db).genename + "\\n"\n \t\tstart+=1\n- printProgress(start, end)\n+\t\tprintProgress(start, end)\n \twith open("prey.txt","w") as x:\n \t\tx.write(prey)\n \n os.rename("bait.txt", bait_out)\n os.rename("inter.txt", inter_out)\n if str(prey_file) != "None": \n- os.rename("prey.txt", prey_file)\n\\ No newline at end of file\n+\tos.rename("prey.txt", prey_file)\n\\ No newline at end of file\n' |