comparison src/General_functions.py @ 1:0435f94d27a7 draft

"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit e2b27d6ff2eab66454f984dbf1a519192f41db97"
author vandelj
date Wed, 09 Sep 2020 10:28:54 +0000
parents 488e6e8bb8cb
children
comparison
equal deleted inserted replaced
0:488e6e8bb8cb 1:0435f94d27a7
1 import re 1 import re
2 import numpy as np 2 import numpy as np
3 import galaxy.model
3 4
4 def get_column_names( file_path, toNotConsider=-1, each=1): 5 def get_column_names( file_path, toNotConsider=-1, each=1):
5 options=[] 6 options=[]
6 inputfile = open(file_path) 7 inputfile = open(file_path)
7 firstLine = next(inputfile).strip().split("\t") 8 firstLine = next(inputfile).strip().split("\t")
74 inputfile.close() 75 inputfile.close()
75 return options 76 return options
76 77
77 def get_condition_file_names( file_list, toNotConsider=-1, each=1): 78 def get_condition_file_names( file_list, toNotConsider=-1, each=1):
78 options=[] 79 options=[]
79 if not isinstance(file_list,list):#if input file is a tabular file, act as get_column_names 80 if not (isinstance(file_list,list) or isinstance(file_list,galaxy.model.HistoryDatasetCollectionAssociation) or isinstance(file_list,galaxy.model.DatasetCollection)) :#if input file is a tabular file, act as get_column_names
80 inputfile = open(file_list.file_name) 81 inputfile = open(file_list.file_name)
81 firstLine = next(inputfile).strip().split("\t") 82 firstLine = next(inputfile).strip().split("\t")
82 cpt=0 83 cpt=0
83 for i, field_component in enumerate( firstLine ): 84 for i, field_component in enumerate( firstLine ):
84 if i!=toNotConsider:#to squeeze the first column 85 if i!=toNotConsider:#to squeeze the first column
86 options.append( ( field_component, field_component, False ) ) 87 options.append( ( field_component, field_component, False ) )
87 cpt+=1 88 cpt+=1
88 if cpt==each: 89 if cpt==each:
89 cpt=0 90 cpt=0
90 inputfile.close() 91 inputfile.close()
91 else:#if input file is a .cel file list or a collection 92 else:#if input file is a .cel file list, a DatasetCollection or a HistoryDatasetCollectionAssociation
92 if not hasattr(file_list[0],'collection'):#if it is not a collection, get name easily 93 if isinstance(file_list,list):#if it is a list, retrieve names easily
93 for i, field_component in enumerate( file_list ): 94 for i, field_component in enumerate( file_list ):
94 options.append( ( field_component.name, field_component.name, False ) ) 95 options.append( ( field_component.name, field_component.name, False ) )
95 else:#if the file is a collection, have to get deeper in the corresponding HistoryDatasetCollectionAssociation object 96 else:#if the file is a DatasetCollection, have to get deeper in the corresponding DatasetCollection object
96 for i, field_component in enumerate( file_list[0].collection.elements ): 97 if isinstance(file_list,galaxy.model.DatasetCollection):#if it is a list, retrieve names easily
97 options.append( ( field_component.element_identifier, field_component.element_identifier, False ) ) 98 for i, field_component in enumerate( file_list.elements ):
99 options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
100 else:#if the file is a HistoryDatasetCollectionAssociation, have to get a little bit deeper in the corresponding HistoryDatasetCollectionAssociation object
101 for i, field_component in enumerate( file_list.collection.elements ):
102 options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
98 return options 103 return options
99 104
100 def generateFactorFile( file_list, factor_list, outputFileName, logFile): 105 def generateFactorFile( file_list, factor_list, outputFileName, logFile):
101 forbidenCharacters={"*",":",",","|"} 106 forbidenCharacters={"*",":",",","|"}
102 outputfile = open(outputFileName, 'w') 107 outputfile = open(outputFileName, 'w')
103 outputLog = open(logFile, 'w') 108 outputLog = open(logFile, 'w')
104 sampleList=[] 109 sampleList=[]
105 if not isinstance(file_list,list): 110 conditionNames=get_condition_file_names(file_list,0) #if it's a unique expression file, remove the first column (index=0)
106 conditionNames=get_condition_file_names(file_list,0) #unique expression file, remove the first column (index=0)
107 else :
108 conditionNames=get_condition_file_names(file_list) #.CEL files
109 for iSample, sample_component in enumerate (conditionNames): 111 for iSample, sample_component in enumerate (conditionNames):
110 sampleList.append(str(sample_component[1])) 112 sampleList.append(str(sample_component[1]))
111 outputLog.write("[INFO] "+str(len(sampleList))+" sample are detected as input\n") 113 outputLog.write("[INFO] "+str(len(sampleList))+" sample are detected as input\n")
112 globalDict=dict() 114 globalDict=dict()
113 factorNameList=[] 115 factorNameList=[]