Repository 'saint_preprocessing'
hg clone https://toolshed.g2.bx.psu.edu/repos/bornea/saint_preprocessing

Changeset 72:5ec0b997fb13 (2016-08-27)
Previous changeset 71:f6d916d1d304 (2016-08-27) Next changeset 73:50391fdc229a (2016-08-27)
Commit message:
Uploaded
modified:
mzID_process2.py
b
diff -r f6d916d1d304 -r 5ec0b997fb13 mzID_process2.py
--- a/mzID_process2.py Sat Aug 27 23:22:20 2016 -0400
+++ b/mzID_process2.py Sat Aug 27 23:26:10 2016 -0400
[
b'@@ -6,7 +6,7 @@\n """\n #######################################################################################\n ## Description: ##\n-#    This program will create inter, prey, and bait files from mzIdentML files\n+#This program will create inter, prey, and bait files from mzIdentML files\n ## Required input: ##\n # 1) mzIdentML file to be reformatted\n # 2) minimum PSM for quantification\n@@ -18,95 +18,95 @@\n ins_path = sys.argv[5]\n \n class ReturnValue1(object):\n-    def __init__(self, sequence, gene):\n-        self.seqlength = sequence\n-        self.genename = gene\n+\tdef __init__(self, sequence, gene):\n+\t\tself.seqlength = sequence\n+\t\tself.genename = gene\n class ReturnValue2(object):\n-    def __init__(self, inter, accessions):\n-        self.inter = inter\n-        self.accessions = accessions\n+\tdef __init__(self, inter, accessions):\n+\t\tself.inter = inter\n+\t\tself.accessions = accessions\n def read_tab(infile):\n-    with open(infile,\'r\') as x:\n-        output = []\n-        for line in x:\n-            line = line.strip()\n-            temp = line.split(\'\\t\')\n-            output.append(temp)\n-    return output\n+\twith open(infile,\'r\') as x:\n+\t\toutput = []\n+\t\tfor line in x:\n+\t\t\tline = line.strip()\n+\t\t\ttemp = line.split(\'\\t\')\n+\t\t\toutput.append(temp)\n+\treturn output\n def printProgress (iteration, total, prefix = \'\', suffix = \'\', decimals = 1, barLength = 100):\n-    """\n-    Call in a loop to create terminal progress bar\n-    @params:\n-        iteration   - Required  : current iteration (Int)\n-        total       - Required  : total iterations (Int)\n-        prefix      - Optional  : prefix string (Str)\n-        suffix      - Optional  : suffix string (Str)\n-        decimals    - Optional  : positive number of decimals in percent complete (Int)\n-        barLength   - Optional  : character length of bar (Int)\n-    """\n-    formatStr       = "{0:." + str(decimals) + "f}"\n-    percents        = formatStr.format(100 * (iteration / float(total)))\n-    filledLength    = int(round(barLength * iteration / float(total)))\n-    bar             = \'=\' * filledLength + \'-\' * (barLength - filledLength)\n-    sys.stdout.write(\'\\r%s |%s| %s%s %s\' % (prefix, bar, percents, \'%\', suffix)),\n-    sys.stdout.flush()\n-    if iteration == total:\n-        sys.stdout.write(\'\\n\')\n-        sys.stdout.flush()\n+\t"""\n+\tCall in a loop to create terminal progress bar\n+\t@params:\n+\t\titeration   - Required  : current iteration (Int)\n+\t\ttotal\t   - Required  : total iterations (Int)\n+\t\tprefix\t  - Optional  : prefix string (Str)\n+\t\tsuffix\t  - Optional  : suffix string (Str)\n+\t\tdecimals\t- Optional  : positive number of decimals in percent complete (Int)\n+\t\tbarLength   - Optional  : character length of bar (Int)\n+\t"""\n+\tformatStr\t   = "{0:." + str(decimals) + "f}"\n+\tpercents\t\t= formatStr.format(100 * (iteration / float(total)))\n+\tfilledLength\t= int(round(barLength * iteration / float(total)))\n+\tbar\t\t\t = \'=\' * filledLength + \'-\' * (barLength - filledLength)\n+\tsys.stdout.write(\'\\r%s |%s| %s%s %s\' % (prefix, bar, percents, \'%\', suffix)),\n+\tsys.stdout.flush()\n+\tif iteration == total:\n+\t\tsys.stdout.write(\'\\n\')\n+\t\tsys.stdout.flush()\n def get_info(uniprot_accession_in,fasta_db): \n-    # Get aminoacid lengths and gene name.\n-    error = open(\'error proteins.txt\', \'a+\')\n-    data = open(fasta_db, \'r\')\n-    data_lines = data.readlines()\n-    db_len = len(data_lines)\n-    seqlength = 0\n-    count = 0\n-    last_line = data_lines[-1]\n-    for data_line in data_lines:\n-        if ">sp" in data_line:\n-            namer = data_line.split("|")[2]\n-            if uniprot_accession_in == data_line.split("|")[1]:\n-                match = count + 1\n-                if \'GN=\' in data_line:\n-                    lst = data_line.split(\'GN=\')\n-                    lst2 = lst[1].split(\' \')\n-                    genename = lst2[0]\n-                if \'GN=\' not in data_line:\n-                    genename = \'NA\'\n-                while ">sp" not in data_lines[match]:\n-                    if match <= db_len:\n-           '..b'match].strip())\n+\t\t\t\t\t\tif data_lines[match] == last_line:\n+\t\t\t\t\t\t\tbreak\n+\t\t\t\t\t\tmatch = match + 1\n+\t\t\t\t\telse:\n+\t\t\t\t\t\tbreak\n+\t\t\t\treturn ReturnValue1(seqlength, genename)\n+\t\tif uniprot_accession_in == namer.split(" ")[0]:\n+\t\t\tmatch = count + 1\n+\t\t\t# Ensures consistent spacing throughout.\n+\t\t\tif \'GN=\' in data_line:\n+\t\t\t\tlst = data_line.split(\'GN=\')\n+\t\t\t\tlst2 = lst[1].split(\' \')\n+\t\t\t\tgenename = lst2[0]\n+\t\t\tif \'GN=\' not in data_line:\n+\t\t\t\tgenename = \'NA\'\n+\t\t\twhile ">sp" not in data_lines[match]:\n+\t\t\t\tif match <= db_len:\n+\t\t\t\t\tseqlength = seqlength + len(data_lines[match].strip())\n+\t\t\t\t\tif data_lines[match] == last_line:\n+\t\t\t\t\t\tbreak\n+\t\t\t\t\tmatch = match + 1\n+\t\t\t\telse:\n+\t\t\t\t\tbreak\n+\t\t\treturn ReturnValue1(seqlength, genename)\n+\t\tcount = count + 1\n+\tif seqlength == 0:\n+\t\terror.write(uniprot_accession_in + \'\\t\' + "Uniprot not in Fasta" + \'\\n\')\n+\t\terror.close\n+\t\tseqlength = \'NA\'\n+\t\tgenename = \'NA\'\n+\t\treturn ReturnValue1(seqlength, genename)\n def make_inter(mzIdentML,replicate,grouping):\n \taccession_index = mzIdentML[0].index("accession")\n \tPSMs = {}\n@@ -135,7 +135,7 @@\n make_prey = sys.argv[3]\n db = sys.argv[4]\n if db == "None":\n-    db = str(ins_path)  + "/SwissProt_HUMAN_2015_12.fasta"\n+\tdb = str(ins_path)  + "/SwissProt_HUMAN_2015_12.fasta"\n make_bait = sys.argv[6]\n bait_bool = sys.argv[7]\n prey_file = sys.argv[8]\n@@ -143,37 +143,37 @@\n inter_out = sys.argv[10]\n \n def bait_create(baits, infile):\n-    # Verifies the Baits are valid in the Scaffold file and writes the Bait.txt.\n-    baits = make_bait.split()\n-    i = 0\n-    bait_file_tmp = open("bait.txt", "w")\n-    order = []\n-    bait_cache = []\n-    while i < len(baits):\n-        if baits[i+2] == "true":\n-            T_C = "C"\n-        else:\n-            T_C = "T"\n-        bait_line = baits[i] + "\\t" + baits[i+1] + "\\t" + T_C + "\\n"\n-        bait_cache.append(str(bait_line))\n-        i = i + 3\n+\t# Verifies the Baits are valid in the Scaffold file and writes the Bait.txt.\n+\tbaits = make_bait.split()\n+\ti = 0\n+\tbait_file_tmp = open("bait.txt", "w")\n+\torder = []\n+\tbait_cache = []\n+\twhile i < len(baits):\n+\t\tif baits[i+2] == "true":\n+\t\t\tT_C = "C"\n+\t\telse:\n+\t\t\tT_C = "T"\n+\t\tbait_line = baits[i] + "\\t" + baits[i+1] + "\\t" + T_C + "\\n"\n+\t\tbait_cache.append(str(bait_line))\n+\t\ti = i + 3\n \n-    for cache_line in bait_cache:\n-        bait_file_tmp.write(cache_line)\n+\tfor cache_line in bait_cache:\n+\t\tbait_file_tmp.write(cache_line)\n \n-    bait_file_tmp.close()\n+\tbait_file_tmp.close()\n \n if bait_bool == \'false\':\n-    bait_create(make_bait, infile)\n-    bait = "bait.txt"\n+\tbait_create(make_bait, infile)\n+\tbait = "bait.txt"\n else:\n-    bait_temp_file = open(sys.argv[2], \'r\')\n-    bait_cache = bait_temp_file.readlines()\n-    bait_file_tmp = open("bait.txt", "wr")\n-    for cache_line in bait_cache:\n-        bait_file_tmp.write(cache_line)\n-    bait_file_tmp.close()\n-    bait = "bait.txt"\n+\tbait_temp_file = open(sys.argv[2], \'r\')\n+\tbait_cache = bait_temp_file.readlines()\n+\tbait_file_tmp = open("bait.txt", "wr")\n+\tfor cache_line in bait_cache:\n+\t\tbait_file_tmp.write(cache_line)\n+\tbait_file_tmp.close()\n+\tbait = "bait.txt"\n bait = read_tab("bait.txt")\n \n inter = ""\n@@ -184,9 +184,9 @@\n \tos.system(cmd)\n \tmzIdentML = read_tab("flat_mzIdentML.txt")\n \tinter = inter + make_inter(mzIdentML,bait[cnt][0],bait[cnt][1]).inter\n-    print inter\n+\tprint inter\n \taccessions.append(make_inter(mzIdentML,bait[cnt][0],bait[cnt][1]).accessions)\n-    print accessions\n+\tprint accessions\n \tcnt+=1\n \n with open("inter.txt","w") as x:\n@@ -205,11 +205,11 @@\n \tfor i in unique_accessions:\n \t\tprey = prey + i + "\\t" + str(get_info(i,db).seqlength) + "\\t" + get_info(i,db).genename + "\\n"\n \t\tstart+=1\n-        printProgress(start, end)\n+\t\tprintProgress(start, end)\n \twith open("prey.txt","w") as x:\n \t\tx.write(prey)\n \n os.rename("bait.txt", bait_out)\n os.rename("inter.txt", inter_out)\n if str(prey_file) != "None": \n-    os.rename("prey.txt", prey_file)\n\\ No newline at end of file\n+\tos.rename("prey.txt", prey_file)\n\\ No newline at end of file\n'