Mercurial > repos > vandelj > giant_aptsummarize
comparison src/LIMMA_options.py @ 0:708f43bda2b6 draft
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
| author | vandelj |
|---|---|
| date | Fri, 26 Jun 2020 09:35:11 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:708f43bda2b6 |
|---|---|
| 1 import re | |
| 2 | |
| 3 def get_column_names( file_path, toNotConsider=None, toNotConsiderBis=None): | |
| 4 options=[] | |
| 5 inputfile = open(file_path) | |
| 6 firstLine = next(inputfile).strip().split("\t") | |
| 7 for i, field_component in enumerate( firstLine ): | |
| 8 if i!=0 and field_component!=toNotConsider and field_component!=toNotConsiderBis:#to squeeze the first column | |
| 9 options.append( ( field_component, field_component, False ) ) | |
| 10 inputfile.close() | |
| 11 return options | |
| 12 | |
| 13 def get_row_names( file_path, factorName ): | |
| 14 inputfile = open(file_path) | |
| 15 firstLine = next(inputfile).strip().split("\t") | |
| 16 iColumn=-1 | |
| 17 for i, field_component in enumerate( firstLine ): | |
| 18 if field_component==factorName:#to test | |
| 19 iColumn=i | |
| 20 options=[] | |
| 21 if iColumn!=-1: | |
| 22 for nextLine in inputfile: | |
| 23 nextLine=nextLine.strip().split("\t") | |
| 24 if len(nextLine)>1: | |
| 25 if (nextLine[iColumn], nextLine[iColumn], False) not in options: | |
| 26 options.append( (nextLine[iColumn], nextLine[iColumn], False) ) | |
| 27 inputfile.close() | |
| 28 return options | |
| 29 | |
| 30 def get_row_names_interaction( file_path, factorNameA, factorNameB ): | |
| 31 inputfile = open(file_path) | |
| 32 firstLine = next(inputfile).strip().split("\t") | |
| 33 iColumnA=-1 | |
| 34 iColumnB=-1 | |
| 35 for i, field_component in enumerate( firstLine ): | |
| 36 if field_component==factorNameA:#to test | |
| 37 iColumnA=i | |
| 38 if field_component==factorNameB:#to test | |
| 39 iColumnB=i | |
| 40 possibleValuesA=[] | |
| 41 possibleValuesB=[] | |
| 42 if iColumnA!=-1 and iColumnB!=-1: | |
| 43 for nextLine in inputfile: | |
| 44 nextLine=nextLine.strip().split("\t") | |
| 45 if len(nextLine)>1: | |
| 46 if nextLine[iColumnA] not in possibleValuesA: | |
| 47 possibleValuesA.append(nextLine[iColumnA]) | |
| 48 if nextLine[iColumnB] not in possibleValuesB: | |
| 49 possibleValuesB.append(nextLine[iColumnB]) | |
| 50 inputfile.close() | |
| 51 options=[] | |
| 52 if len(possibleValuesA)>=1 and len(possibleValuesB)>=1 and possibleValuesA[0]!="None" and possibleValuesB[0]!="None": | |
| 53 for counterA in range(len(possibleValuesA)): | |
| 54 for counterB in range(len(possibleValuesB)): | |
| 55 options.append( (possibleValuesA[counterA]+"*"+possibleValuesB[counterB], possibleValuesA[counterA]+"*"+possibleValuesB[counterB], False) ) | |
| 56 return options | |
| 57 | |
| 58 def get_comparisonsA( factorA, valuesA ): | |
| 59 options=[] | |
| 60 formatValuesA=re.sub("(^\[u')|('\]$)","", str(valuesA)) | |
| 61 possibleValues=formatValuesA.split("', u'") | |
| 62 if len(possibleValues)>=2: | |
| 63 for counter in range(len(possibleValues)-1): | |
| 64 for innerCounter in range(counter+1,len(possibleValues)): | |
| 65 options.append( (possibleValues[counter]+" - "+possibleValues[innerCounter], possibleValues[counter]+" - "+possibleValues[innerCounter], False) ) | |
| 66 options.append( (possibleValues[innerCounter]+" - "+possibleValues[counter], possibleValues[innerCounter]+" - "+possibleValues[counter], False) ) | |
| 67 return options | |
| 68 | |
| 69 def get_comparisonsAB(factorA, valuesA, factorB, valuesB, interaction): | |
| 70 options=[] | |
| 71 formatValuesA=re.sub("(^\[u')|('\]$)","", str(valuesA)) | |
| 72 possibleValuesA=formatValuesA.split("', u'") | |
| 73 formatValuesB=re.sub("(^\[u')|('\]$)","", str(valuesB)) | |
| 74 possibleValuesB=formatValuesB.split("', u'") | |
| 75 if str(interaction)=="False": | |
| 76 if len(possibleValuesA)>=2: | |
| 77 for counter in range(len(possibleValuesA)-1): | |
| 78 for innerCounter in range(counter+1,len(possibleValuesA)): | |
| 79 options.append( (possibleValuesA[counter]+" - "+possibleValuesA[innerCounter], possibleValuesA[counter]+" - "+possibleValuesA[innerCounter], False) ) | |
| 80 options.append( (possibleValuesA[innerCounter]+" - "+possibleValuesA[counter], possibleValuesA[innerCounter]+" - "+possibleValuesA[counter], False) ) | |
| 81 if len(possibleValuesB)>=2: | |
| 82 for counter in range(len(possibleValuesB)-1): | |
| 83 for innerCounter in range(counter+1,len(possibleValuesB)): | |
| 84 options.append( (possibleValuesB[counter]+" - "+possibleValuesB[innerCounter], possibleValuesB[counter]+" - "+possibleValuesB[innerCounter], False) ) | |
| 85 options.append( (possibleValuesB[innerCounter]+" - "+possibleValuesB[counter], possibleValuesB[innerCounter]+" - "+possibleValuesB[counter], False) ) | |
| 86 else: | |
| 87 if len(possibleValuesA)>=1 and len(possibleValuesB)>=1 and possibleValuesA[0]!="None" and possibleValuesB[0]!="None": | |
| 88 for counterA in range(len(possibleValuesA)): | |
| 89 for innerCounterA in range(len(possibleValuesA)): | |
| 90 for counterB in range(len(possibleValuesB)): | |
| 91 for innerCounterB in range(len(possibleValuesB)): | |
| 92 if not(counterA==innerCounterA and counterB==innerCounterB): | |
| 93 options.append( ("("+possibleValuesA[counterA]+" * "+possibleValuesB[counterB]+") - ("+possibleValuesA[innerCounterA]+" * "+possibleValuesB[innerCounterB]+")","("+possibleValuesA[counterA]+" * "+possibleValuesB[counterB]+") - ("+possibleValuesA[innerCounterA]+" * "+possibleValuesB[innerCounterB]+")", False) ) | |
| 94 return options | |
| 95 | |
| 96 def get_row_names_allInteractions( file_path, factorSelected): | |
| 97 formatFactors=re.sub("(^\[u')|('\]$)","", str(factorSelected)) | |
| 98 factorsList=formatFactors.split("', u'") | |
| 99 iColumn=[None] * len(factorsList) | |
| 100 valuesList=[None] * len(factorsList) | |
| 101 | |
| 102 inputfile = open(file_path) | |
| 103 firstLine = next(inputfile).strip().split("\t") | |
| 104 for iField, fieldComponent in enumerate( firstLine ): | |
| 105 for iFactor, factorComponent in enumerate(factorsList): | |
| 106 if fieldComponent==factorComponent: | |
| 107 iColumn[iFactor]=iField | |
| 108 valuesList[iFactor]=[] | |
| 109 | |
| 110 for nextLine in inputfile: | |
| 111 nextLine=nextLine.strip().split("\t") | |
| 112 if len(nextLine)>1: | |
| 113 for iFactor, factorComponent in enumerate(factorsList): | |
| 114 if nextLine[iColumn[iFactor]] not in valuesList[iFactor]: | |
| 115 valuesList[iFactor].append(nextLine[iColumn[iFactor]]) | |
| 116 inputfile.close() | |
| 117 | |
| 118 allCombinations=[] | |
| 119 for iFactor, factorComponent in enumerate(factorsList): | |
| 120 if iFactor==0: | |
| 121 allCombinations=valuesList[iFactor] | |
| 122 else: | |
| 123 currentCombinations=allCombinations | |
| 124 allCombinations=[] | |
| 125 for iValue, valueComponent in enumerate(valuesList[iFactor]): | |
| 126 for iCombination, combination in enumerate(currentCombinations): | |
| 127 allCombinations.append(combination+"*"+valueComponent) | |
| 128 | |
| 129 options=[] | |
| 130 for iCombination, combination in enumerate(allCombinations): | |
| 131 options.append((combination,combination,False)) | |
| 132 | |
| 133 return options | |
| 134 | |
| 135 def get_allrow_names( file_path, factorSelected ): | |
| 136 formatFactors=re.sub("(^\[u')|('\]$)","", str(factorSelected)) | |
| 137 factorsList=formatFactors.split("', u'") | |
| 138 iColumn=[None] * len(factorsList) | |
| 139 valuesList=[None] * len(factorsList) | |
| 140 | |
| 141 inputfile = open(file_path) | |
| 142 firstLine = next(inputfile).strip().split("\t") | |
| 143 for iField, fieldComponent in enumerate( firstLine ): | |
| 144 for iFactor, factorComponent in enumerate(factorsList): | |
| 145 if fieldComponent==factorComponent: | |
| 146 iColumn[iFactor]=iField | |
| 147 valuesList[iFactor]=[] | |
| 148 | |
| 149 for nextLine in inputfile: | |
| 150 nextLine=nextLine.strip().split("\t") | |
| 151 if len(nextLine)>1: | |
| 152 for iFactor, factorComponent in enumerate(factorsList): | |
| 153 if nextLine[iColumn[iFactor]] not in valuesList[iFactor]: | |
| 154 valuesList[iFactor].append(nextLine[iColumn[iFactor]]) | |
| 155 inputfile.close() | |
| 156 | |
| 157 allValues=[] | |
| 158 for iFactor, factorComponent in enumerate(factorsList): | |
| 159 for iValue, valueComponent in enumerate(valuesList[iFactor]): | |
| 160 allValues.append(factorComponent+":"+valueComponent) | |
| 161 | |
| 162 options=[] | |
| 163 for iValue, valueComponent in enumerate(allValues): | |
| 164 options.append((valueComponent,valueComponent,False)) | |
| 165 | |
| 166 return options | |
| 167 | |
| 168 def replaceNamesInFiles(expressionFile_name,conditionFile_name,outputExpressionFile,outputConditionFile,ouputDictionnary): | |
| 169 dico={} | |
| 170 forbidenCharacters={"*",":",",","|"} | |
| 171 ##start with expression file, read only the first line | |
| 172 inputfile = open(expressionFile_name) | |
| 173 outputfile = open(outputExpressionFile, 'w') | |
| 174 firstLine = next(inputfile).rstrip().split("\t") | |
| 175 iCondition=1 | |
| 176 newFirstLine="" | |
| 177 for i, field_component in enumerate( firstLine ): | |
| 178 if (i>0): | |
| 179 #conditions names should not be redundant with other conditions | |
| 180 if(field_component not in dico): | |
| 181 dico[field_component]="Condition"+str(iCondition) | |
| 182 newFirstLine+="\t"+"Condition"+str(iCondition) | |
| 183 iCondition+=1 | |
| 184 else: | |
| 185 raise NameError('condition name allready exists!') | |
| 186 else: | |
| 187 newFirstLine+=field_component | |
| 188 outputfile.write(newFirstLine+"\n") | |
| 189 for line in inputfile: | |
| 190 outputfile.write(line) | |
| 191 outputfile.close() | |
| 192 inputfile.close() | |
| 193 #then parse condition file, read all lines in this case | |
| 194 inputfile = open(conditionFile_name) | |
| 195 outputfile = open(outputConditionFile, 'w') | |
| 196 firstLine=1 | |
| 197 iFactor=1 | |
| 198 iValue=1 | |
| 199 for line in inputfile: | |
| 200 currentLine = line.rstrip().split("\t") | |
| 201 newCurrentLine="" | |
| 202 for i, field_component in enumerate( currentLine ): | |
| 203 #special treatment for the first line | |
| 204 if (firstLine==1): | |
| 205 if (i==0): | |
| 206 newCurrentLine=field_component | |
| 207 else: | |
| 208 #factor names should not be redundant with other factors or conditions | |
| 209 if(field_component not in dico): | |
| 210 dico[field_component]="Factor"+str(iFactor) | |
| 211 newCurrentLine+="\t"+"Factor"+str(iFactor) | |
| 212 iFactor+=1 | |
| 213 else: | |
| 214 raise NameError('factor name allready exists!') | |
| 215 else: | |
| 216 if (i==0): | |
| 217 #check if condition name allready exist and used it if it is, or create a new one if not | |
| 218 if(field_component not in dico): | |
| 219 dico[field_component]="Condition"+str(iCondition) | |
| 220 newCurrentLine="Condition"+str(iCondition) | |
| 221 iCondition+=1 | |
| 222 else: | |
| 223 newCurrentLine=dico[field_component] | |
| 224 else: | |
| 225 if(field_component not in dico): | |
| 226 dico[field_component]="Value"+str(iValue) | |
| 227 newCurrentLine+="\tValue"+str(iValue) | |
| 228 iValue+=1 | |
| 229 else: | |
| 230 newCurrentLine+="\t"+dico[field_component] | |
| 231 outputfile.write(newCurrentLine+"\n") | |
| 232 firstLine=0 | |
| 233 outputfile.close() | |
| 234 inputfile.close() | |
| 235 ##check if any entries in dictionnary contains forbiden character | |
| 236 for key, value in dico.items(): | |
| 237 for specialCharacter in forbidenCharacters: | |
| 238 if value.startswith("Condition")==False and key.find(specialCharacter)!=-1: | |
| 239 return 1 | |
| 240 ##then write dictionnary in a additional file | |
| 241 outputfile = open(ouputDictionnary, 'w') | |
| 242 for key, value in dico.items(): | |
| 243 outputfile.write(key+"\t"+value+"\n") | |
| 244 outputfile.close() | |
| 245 return 0 | |
| 246 | |
| 247 | |
| 248 def replaceNamesBlockInFiles(expressionFile_name,conditionFile_name,blockingFile_name,outputExpressionFile,outputConditionFile,outputBlockingFile,ouputDictionnary): | |
| 249 dico={} | |
| 250 forbidenCharacters={"*",":",",","|"} | |
| 251 ##start with expression file, read only the first line | |
| 252 inputfile = open(expressionFile_name) | |
| 253 outputfile = open(outputExpressionFile, 'w') | |
| 254 firstLine = next(inputfile).rstrip().split("\t") | |
| 255 iCondition=1 | |
| 256 newFirstLine="" | |
| 257 for i, field_component in enumerate( firstLine ): | |
| 258 if (i>0): | |
| 259 #conditions names should not be redundant with other conditions | |
| 260 if(field_component not in dico): | |
| 261 dico[field_component]="Condition"+str(iCondition) | |
| 262 newFirstLine+="\t"+"Condition"+str(iCondition) | |
| 263 iCondition+=1 | |
| 264 else: | |
| 265 raise NameError('condition name allready exists!') | |
| 266 else: | |
| 267 newFirstLine+=field_component | |
| 268 outputfile.write(newFirstLine+"\n") | |
| 269 for line in inputfile: | |
| 270 outputfile.write(line) | |
| 271 outputfile.close() | |
| 272 inputfile.close() | |
| 273 #then parse condition file, read all lines in this case | |
| 274 iFactor=1 | |
| 275 iValue=1 | |
| 276 for fileNum in range(2): | |
| 277 if fileNum==0: | |
| 278 inputfile = open(conditionFile_name) | |
| 279 outputfile = open(outputConditionFile, 'w') | |
| 280 else: | |
| 281 inputfile = open(blockingFile_name) | |
| 282 outputfile = open(outputBlockingFile, 'w') | |
| 283 firstLine=1 | |
| 284 for line in inputfile: | |
| 285 currentLine = line.rstrip().split("\t") | |
| 286 newCurrentLine="" | |
| 287 for i, field_component in enumerate( currentLine ): | |
| 288 #special treatment for the first line | |
| 289 if (firstLine==1): | |
| 290 if (i==0): | |
| 291 newCurrentLine=field_component | |
| 292 else: | |
| 293 #factor names should not be redundant with other factors or conditions | |
| 294 if(field_component not in dico): | |
| 295 dico[field_component]="Factor"+str(iFactor) | |
| 296 newCurrentLine+="\t"+"Factor"+str(iFactor) | |
| 297 iFactor+=1 | |
| 298 else: | |
| 299 raise NameError('factor name allready exists!') | |
| 300 else: | |
| 301 if (i==0): | |
| 302 #check if condition name allready exist and used it if it is, or create a new one if not | |
| 303 if(field_component not in dico): | |
| 304 dico[field_component]="Condition"+str(iCondition) | |
| 305 newCurrentLine="Condition"+str(iCondition) | |
| 306 iCondition+=1 | |
| 307 else: | |
| 308 newCurrentLine=dico[field_component] | |
| 309 else: | |
| 310 if(field_component not in dico): | |
| 311 dico[field_component]="Value"+str(iValue) | |
| 312 newCurrentLine+="\tValue"+str(iValue) | |
| 313 iValue+=1 | |
| 314 else: | |
| 315 newCurrentLine+="\t"+dico[field_component] | |
| 316 outputfile.write(newCurrentLine+"\n") | |
| 317 firstLine=0 | |
| 318 outputfile.close() | |
| 319 inputfile.close() | |
| 320 ##check if any entries in dictionnary contains forbiden character | |
| 321 for key, value in dico.items(): | |
| 322 for specialCharacter in forbidenCharacters: | |
| 323 if value.startswith("Condition")==False and key.find(specialCharacter)!=-1: | |
| 324 return 1 | |
| 325 ##then write dictionnary in a additional file | |
| 326 outputfile = open(ouputDictionnary, 'w') | |
| 327 for key, value in dico.items(): | |
| 328 outputfile.write(key+"\t"+value+"\n") | |
| 329 outputfile.close() | |
| 330 return 0 |
