Galaxy |

Changeset 1:d72f1bc5ce9e (2020-09-09)

Previous changeset 0:3022feec50fe (2020-06-26) Next changeset 2:738ed1deed23 (2020-09-25)

Commit message:
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit e2b27d6ff2eab66454f984dbf1a519192f41db97"

modified:
galaxy/wrappers/FormatForGSEA.xml
src/General_functions.py

diff -r 3022feec50fe -r d72f1bc5ce9e galaxy/wrappers/FormatForGSEA.xml
--- a/galaxy/wrappers/FormatForGSEA.xml Fri Jun 26 09:36:46 2020 -0400
+++ b/galaxy/wrappers/FormatForGSEA.xml Wed Sep 09 10:36:26 2020 +0000

[

@@ -1,6 +1,7 @@
<tool name="GIANT-GSEA Formatting" id="giant_gsea_format" version="0.2.0">
   <description>Format input files for GSEA software</description>
   <code file="../../src/General_functions.py"/>
+  
   <stdio>
     <regex match="Execution halted"
            source="both"
@@ -60,7 +61,7 @@
         </param>
         <param type="data" name="conditionInformation" format="tabular" label="Factor information tabular file" optional="false" multiple="false">
         </param>
-            <param name="factorToInclude" type="select" label="Reference factor" multiple="false" optional="false"  refresh_on_change="true"
+        <param name="factorToInclude" type="select" label="Reference factor" multiple="false" optional="false"  refresh_on_change="true"
               dynamic_options="get_column_names_filteredList(mainCondition['conditionInformation'].file_name,[0])">
         </param>
       </when>
@@ -101,31 +102,27 @@

  <tests>
   <test maxseconds="3600" >
-    <param name="selection" value="classicGSEA" />
-    <param name="expressionData" value="./NormalizedData.tabular" />
-    <param name="conditionInformation" value="./conditionGroups.txt" />
-    <param name="factorToInclude" value="Treatment" />
-    <output name="outExpression" file="./GSEA-Formatting/outputExpression.gct" />
-    <output name="outPhenotypes" file="./GSEA-Formatting/outputPhenotypesTreatment.cls" />
-    <output name="log" file="./GSEA-Formatting/outputRanks.log" />
+    <conditional name="mainCondition">
+      <param name="selection" value="classicGSEA" />
+      <param name="expressionData" value="./NormalizedData.tabular" />
+      <param name="conditionInformation" value="./FactorFileGenerator/output/conditionsFile.csv" />
+      <param name="factorToInclude" value="Treatment" />
+    </conditional>
+    <output name="outExpression" file="./FormatForGSEA/output/outputExpression_A.gct" />
+    <output name="outPhenotypes" file="./FormatForGSEA/output/outputPhenotypes_A.cls" />
+    <output name="log" file="./FormatForGSEA/output/outputLog_A.txt" />
   </test>
+
   <test maxseconds="3600" >
-    <param name="selection" value="classicGSEA" />
-    <param name="expressionData" value="./NormalizedData.tabular" />
-    <param name="conditionInformation" value="./conditionGroups.txt" />
-    <param name="factorToInclude" value="Type" />
-    <output name="outExpression" file="./GSEA-Formatting/outputExpression.gct" />
-    <output name="outPhenotypes" file="./GSEA-Formatting/outputPhenotypesType.cls" />
-    <output name="log" file="./GSEA-Formatting/outputRanks.log" />
-  </test>
-  <test maxseconds="3600" >
-    <param name="selection" value="rankedGSEA" />
-    <param name="differentialAnalysis" value="./LIMMAstatistics.tabular" />
-    <param name="comparisonsToUse" value="WT*Control-KO*Control" />
-    <param name="rankingIndice" value="FC" />
-    <param name="pvalThreshold" value="0.05" />
-    <output name="outRankedGenes" file="./GSEA-Formatting/outputRanks.rnk" />
-    <output name="log" file="./GSEA-Formatting/outputRanks.log" />
+    <conditional name="mainCondition">
+      <param name="selection" value="rankedGSEA" />
+      <param name="differentialAnalysis" value="./DiffExprLimma/output/outputStat.csv" />
+      <param name="comparisonsToUse" value="TreatVsControl" />
+      <param name="rankingIndice" value="absFC" />
+      <param name="pvalThreshold" value="0.05" />
+    </conditional>
+    <output name="outRankedGenes" file="./FormatForGSEA/output/outputRanks_B.rnk" />
+    <output name="log" file="./FormatForGSEA/output/outputLog_B.txt" />
   </test>
</tests>
   <help>

diff -r 3022feec50fe -r d72f1bc5ce9e src/General_functions.py
--- a/src/General_functions.py Fri Jun 26 09:36:46 2020 -0400
+++ b/src/General_functions.py Wed Sep 09 10:36:26 2020 +0000

[

@@ -1,5 +1,6 @@
import re
import numpy as np
+import galaxy.model

def get_column_names( file_path, toNotConsider=-1, each=1):
options=[]
@@ -76,7 +77,7 @@

def get_condition_file_names( file_list, toNotConsider=-1, each=1):
options=[]
- if not isinstance(file_list,list):#if input file is a tabular file, act as get_column_names
+ if not (isinstance(file_list,list) or isinstance(file_list,galaxy.model.HistoryDatasetCollectionAssociation) or isinstance(file_list,galaxy.model.DatasetCollection)) :#if input file is a tabular file, act as get_column_names
inputfile = open(file_list.file_name)
firstLine = next(inputfile).strip().split("\t")
cpt=0
@@ -88,13 +89,17 @@
if cpt==each:
cpt=0
inputfile.close()
- else:#if input file is a .cel file list or a collection
- if not hasattr(file_list[0],'collection'):#if it is not a collection, get name easily
- for i, field_component in enumerate( file_list ):
- options.append( ( field_component.name, field_component.name, False ) )
- else:#if the file is a collection, have to get deeper in the corresponding HistoryDatasetCollectionAssociation object
- for i, field_component in enumerate( file_list[0].collection.elements ):
- options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
+ else:#if input file is a .cel file list, a DatasetCollection or a HistoryDatasetCollectionAssociation
+ if isinstance(file_list,list):#if it is a list, retrieve names easily
+ for i, field_component in enumerate( file_list ):
+ options.append( ( field_component.name, field_component.name, False ) )
+ else:#if the file is a DatasetCollection, have to get deeper in the corresponding DatasetCollection object
+ if isinstance(file_list,galaxy.model.DatasetCollection):#if it is a list, retrieve names easily
+ for i, field_component in enumerate( file_list.elements ):
+ options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
+ else:#if the file is a HistoryDatasetCollectionAssociation, have to get a little bit deeper in the corresponding HistoryDatasetCollectionAssociation object
+ for i, field_component in enumerate( file_list.collection.elements ):
+ options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
return options

def generateFactorFile( file_list, factor_list, outputFileName, logFile):
@@ -102,10 +107,7 @@
outputfile = open(outputFileName, 'w')
outputLog = open(logFile, 'w')
sampleList=[]
- if not isinstance(file_list,list):
- conditionNames=get_condition_file_names(file_list,0) #unique expression file, remove the first column (index=0)
- else :
- conditionNames=get_condition_file_names(file_list) #.CEL files
+ conditionNames=get_condition_file_names(file_list,0) #if it's a unique expression file, remove the first column (index=0)
for iSample, sample_component in enumerate (conditionNames):
sampleList.append(str(sample_component[1]))
outputLog.write("[INFO] "+str(len(sampleList))+" sample are detected as input\n")