Mercurial > repos > yufei-luo > s_mart
comparison SMART/Java/Python/misc/Utils.py @ 6:769e306b7933
Change the repository level.
| author | yufei-luo |
|---|---|
| date | Fri, 18 Jan 2013 04:54:14 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 5:ea3082881bf8 | 6:769e306b7933 |
|---|---|
| 1 # | |
| 2 # Copyright INRA-URGI 2009-2010 | |
| 3 # | |
| 4 # This software is governed by the CeCILL license under French law and | |
| 5 # abiding by the rules of distribution of free software. You can use, | |
| 6 # modify and/ or redistribute the software under the terms of the CeCILL | |
| 7 # license as circulated by CEA, CNRS and INRIA at the following URL | |
| 8 # "http://www.cecill.info". | |
| 9 # | |
| 10 # As a counterpart to the access to the source code and rights to copy, | |
| 11 # modify and redistribute granted by the license, users are provided only | |
| 12 # with a limited warranty and the software's author, the holder of the | |
| 13 # economic rights, and the successive licensors have only limited | |
| 14 # liability. | |
| 15 # | |
| 16 # In this respect, the user's attention is drawn to the risks associated | |
| 17 # with loading, using, modifying and/or developing or reproducing the | |
| 18 # software by the user in light of its specific status of free software, | |
| 19 # that may mean that it is complicated to manipulate, and that also | |
| 20 # therefore means that it is reserved for developers and experienced | |
| 21 # professionals having in-depth computer knowledge. Users are therefore | |
| 22 # encouraged to load and test the software's suitability as regards their | |
| 23 # requirements in conditions enabling the security of their systems and/or | |
| 24 # data to be ensured and, more generally, to use and operate it in the | |
| 25 # same conditions as regards security. | |
| 26 # | |
| 27 # The fact that you are presently reading this means that you have had | |
| 28 # knowledge of the CeCILL license and that you accept its terms. | |
| 29 # | |
| 30 """Some useful functions""" | |
| 31 | |
| 32 import sys, os | |
| 33 import random | |
| 34 import subprocess | |
| 35 | |
| 36 | |
| 37 def writeFile(fileName, content): | |
| 38 """ | |
| 39 Write the content of a file | |
| 40 """ | |
| 41 handle = open(fileName, "w") | |
| 42 handle.write(content) | |
| 43 handle.close() | |
| 44 | |
| 45 def sumOfLists(list1, list2): | |
| 46 """ | |
| 47 Element by element sum | |
| 48 """ | |
| 49 if len(list1) != len(list2): | |
| 50 sys.exit("Cannot sum list whose sizes are different!") | |
| 51 return [list1[i] + list2[i] for i in range(len(list1))] | |
| 52 | |
| 53 | |
| 54 def protectBackslashes(string): | |
| 55 """ | |
| 56 Protect the backslashes in a path by adding another backslash | |
| 57 """ | |
| 58 return string.replace("\\", "\\\\") | |
| 59 | |
| 60 | |
| 61 def getHammingDistance(string1, string2): | |
| 62 """ | |
| 63 Compute Hamming distance between two strings | |
| 64 """ | |
| 65 if len(string1) != len(string2): | |
| 66 raise Exception("Error, size of %s and %s differ" % (string1, string2)) | |
| 67 return sum(ch1 != ch2 for ch1, ch2 in zip(string1, string2)) | |
| 68 | |
| 69 | |
| 70 def getLevenshteinDistance(string1, string2): | |
| 71 """ | |
| 72 Compute Levenshtein distance between two strings | |
| 73 """ | |
| 74 if len(string1) < len(string2): | |
| 75 return getLevenshteinDistance(string2, string1) | |
| 76 if not string1: | |
| 77 return len(string2) | |
| 78 previousRow = xrange(len(string2) + 1) | |
| 79 for i, c1 in enumerate(string1): | |
| 80 currentRow = [i + 1] | |
| 81 for j, c2 in enumerate(string2): | |
| 82 insertions = previousRow[j + 1] + 1 | |
| 83 deletions = currentRow[j] + 1 | |
| 84 substitutions = previousRow[j] + (c1 != c2) | |
| 85 currentRow.append(min(insertions, deletions, substitutions)) | |
| 86 previousRow = currentRow | |
| 87 return previousRow[-1] | |
| 88 | |
| 89 | |
| 90 def getMinAvgMedMax(values): | |
| 91 """ | |
| 92 Get some stats about a dict | |
| 93 @param values: a distribution (the value being the number of occurrences of the key) | |
| 94 @type values: dict int to int | |
| 95 @return: a tuple | |
| 96 """ | |
| 97 minValues = min(values.keys()) | |
| 98 maxValues = max(values.keys()) | |
| 99 sumValues = sum([value * values[value] for value in values]) | |
| 100 nbValues = sum(values.values()) | |
| 101 allValues = [] | |
| 102 for key in values: | |
| 103 for i in range(values[key]): | |
| 104 allValues.append(key) | |
| 105 sortedValues = sorted(allValues) | |
| 106 sorted(values.values()) | |
| 107 if (nbValues % 2 == 0): | |
| 108 medValues = (sortedValues[nbValues / 2 - 1] + sortedValues[nbValues / 2]) / 2.0 | |
| 109 else: | |
| 110 medValues = sortedValues[(nbValues + 1) / 2 - 1] | |
| 111 return (minValues, float(sumValues) / nbValues, medValues, maxValues) | |
| 112 | |
| 113 | |
| 114 def xor(value1, value2): | |
| 115 """ | |
| 116 Logical xor | |
| 117 @param value1: a value | |
| 118 @type value1: anything | |
| 119 @param value2: a value | |
| 120 @type value2: anything | |
| 121 """ | |
| 122 return bool(value1) != bool(value2) | |
| 123 | |
| 124 | |
| 125 def diff(fileName1, fileName2): | |
| 126 """ | |
| 127 Compare two files | |
| 128 @param fileName1: a file name | |
| 129 @type fileName1: string | |
| 130 @param fileName2: another file name | |
| 131 @type fileName2: string | |
| 132 @return: None if the files are the same, a string otherwise | |
| 133 """ | |
| 134 handle1 = open(fileName1) | |
| 135 lines1 = handle1.readlines() | |
| 136 handle2 = open(fileName2) | |
| 137 lines2 = handle2.readlines() | |
| 138 if len(lines1) != len(lines2): | |
| 139 print "Sizes of files differ (%d != %d)" % (len(lines1), len(lines2)) | |
| 140 return False | |
| 141 for i in xrange(len(lines1)): | |
| 142 if lines1[i] != lines2[i]: | |
| 143 print "Line %d differ ('%s' != '%s')" % (i, lines1[i].strip(), lines2[i].strip()) | |
| 144 return False | |
| 145 return True | |
| 146 | |
| 147 | |
| 148 def binomialCoefficient(a, b): | |
| 149 """ | |
| 150 Compute cumulated product from a to b | |
| 151 @param a: a value | |
| 152 @type a: int | |
| 153 @param b: a value | |
| 154 @type b: int | |
| 155 """ | |
| 156 if a > b / 2: | |
| 157 a = b-a | |
| 158 p = 1.0 | |
| 159 for i in range(b-a+1, b+1): | |
| 160 p *= i | |
| 161 q = 1.0 | |
| 162 for i in range(1, a+1): | |
| 163 q *= i | |
| 164 return p / q | |
| 165 | |
| 166 | |
| 167 memory = {} | |
| 168 | |
| 169 # def fisherExactPValue(a, b, c, d): | |
| 170 # """ | |
| 171 # P-value of Fisher exact test for 2x2 contingency table | |
| 172 # """ | |
| 173 # if (a, b, c, d) in memory: | |
| 174 # return memory[(a, b, c, d)] | |
| 175 | |
| 176 # n = a + b + c + d | |
| 177 # i1 = binomialCoefficient(a, a+b) | |
| 178 # i2 = binomialCoefficient(c, a+c) | |
| 179 # i3 = binomialCoefficient(c+d, n) | |
| 180 # pValue = i1 * i2 / i3 | |
| 181 | |
| 182 # memory[(a, b, c, d)] = pValue | |
| 183 | |
| 184 # return pValue | |
| 185 | |
| 186 | |
| 187 def fisherExactPValue(a, b, c, d): | |
| 188 if (a, b, c, d) in memory: | |
| 189 return memory[(a, b, c, d)] | |
| 190 | |
| 191 scriptFileName = "tmpScript-%d.R" % (random.randint(0, 10000)) | |
| 192 rScript = open(scriptFileName, "w") | |
| 193 rScript.write("data = matrix(c(%d, %d, %d, %d), nr=2)\n" % (a, b, c, d)) | |
| 194 rScript.write("fisher.test(data)\n") | |
| 195 #rScript.write("chisq.test(data)\n") | |
| 196 rScript.close() | |
| 197 | |
| 198 rCommand = "R" | |
| 199 if "SMARTRPATH" in os.environ: | |
| 200 rCommand = os.environ["SMARTRPATH"] | |
| 201 command = "\"%s\" CMD BATCH %s" % (rCommand, scriptFileName) | |
| 202 status = subprocess.call(command, shell=True) | |
| 203 | |
| 204 if status != 0: | |
| 205 sys.exit("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status)) | |
| 206 | |
| 207 outputRFileName = "%sout" % (scriptFileName) | |
| 208 outputRFile = open(outputRFileName) | |
| 209 pValue = None | |
| 210 pValueTag = "p-value " | |
| 211 for line in outputRFile: | |
| 212 line = line.strip() | |
| 213 if line == "": continue | |
| 214 for splittedLine in line.split(","): | |
| 215 splittedLine = splittedLine.strip() | |
| 216 if splittedLine.startswith(pValueTag): | |
| 217 pValue = float(splittedLine.split()[-1]) | |
| 218 break | |
| 219 | |
| 220 if pValue == None: | |
| 221 sys.exit("Problem with the cannot find p-value! File %s, values are: %d, %d, %d, %d" % (scriptFileName, a, b, c, d)) | |
| 222 | |
| 223 os.remove(scriptFileName) | |
| 224 os.remove(outputRFileName) | |
| 225 | |
| 226 memory[(a, b, c, d)] = pValue | |
| 227 | |
| 228 return pValue | |
| 229 | |
| 230 | |
| 231 def fisherExactPValueBulk(list): | |
| 232 | |
| 233 scriptFileName = "tmpScript-%d.R" % (random.randint(0, 10000)) | |
| 234 rScript = open(scriptFileName, "w") | |
| 235 for element in list: | |
| 236 rScript.write("fisher.test(matrix(c(%d, %d, %d, %d), nr=2))$p.value\n" % (int(element[0]), int(element[1]), int(element[2]), int(element[3]))) | |
| 237 rScript.close() | |
| 238 | |
| 239 rCommand = "R" | |
| 240 if "SMARTRPATH" in os.environ: | |
| 241 rCommand = os.environ["SMARTRPATH"] | |
| 242 command = "\"%s\" CMD BATCH %s" % (rCommand, scriptFileName) | |
| 243 status = subprocess.call(command, shell=True) | |
| 244 | |
| 245 if status != 0: | |
| 246 sys.exit("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status)) | |
| 247 | |
| 248 outputRFileName = "%sout" % (scriptFileName) | |
| 249 outputRFile = open(outputRFileName) | |
| 250 pValue = None | |
| 251 pValueTag = "[1] " | |
| 252 results = {} | |
| 253 cpt = 0 | |
| 254 for line in outputRFile: | |
| 255 line = line.strip() | |
| 256 if line == "": continue | |
| 257 if line.startswith(pValueTag): | |
| 258 pValue = float(line.split()[-1]) | |
| 259 results[list[cpt][0:2]] = pValue | |
| 260 cpt += 1 | |
| 261 | |
| 262 if pValue == None: | |
| 263 sys.exit("Problem with the cannot find p-value!") | |
| 264 if cpt != len(list): | |
| 265 sys.exit("Error in the number of p-values computed by R in file '%s'!" % (scriptFileName)) | |
| 266 | |
| 267 os.remove(scriptFileName) | |
| 268 os.remove(outputRFileName) | |
| 269 | |
| 270 return results | |
| 271 |
