Mercurial > repos > vandelj > giant_gsea_format
changeset 1:d72f1bc5ce9e draft
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit e2b27d6ff2eab66454f984dbf1a519192f41db97"
author | vandelj |
---|---|
date | Wed, 09 Sep 2020 10:36:26 +0000 |
parents | 3022feec50fe |
children | 738ed1deed23 |
files | galaxy/wrappers/FormatForGSEA.xml src/General_functions.py |
diffstat | 2 files changed, 35 insertions(+), 36 deletions(-) [+] |
line wrap: on
line diff
--- a/galaxy/wrappers/FormatForGSEA.xml Fri Jun 26 09:36:46 2020 -0400 +++ b/galaxy/wrappers/FormatForGSEA.xml Wed Sep 09 10:36:26 2020 +0000 @@ -1,6 +1,7 @@ <tool name="GIANT-GSEA Formatting" id="giant_gsea_format" version="0.2.0"> <description>Format input files for GSEA software</description> <code file="../../src/General_functions.py"/> + <!--<code file="./src/General_functions.py"/> change for Planemo test--> <stdio> <regex match="Execution halted" source="both" @@ -60,7 +61,7 @@ </param> <param type="data" name="conditionInformation" format="tabular" label="Factor information tabular file" optional="false" multiple="false"> </param> - <param name="factorToInclude" type="select" label="Reference factor" multiple="false" optional="false" refresh_on_change="true" + <param name="factorToInclude" type="select" label="Reference factor" multiple="false" optional="false" refresh_on_change="true" dynamic_options="get_column_names_filteredList(mainCondition['conditionInformation'].file_name,[0])"> </param> </when> @@ -101,31 +102,27 @@ <tests> <test maxseconds="3600" > - <param name="selection" value="classicGSEA" /> - <param name="expressionData" value="./NormalizedData.tabular" /> - <param name="conditionInformation" value="./conditionGroups.txt" /> - <param name="factorToInclude" value="Treatment" /> - <output name="outExpression" file="./GSEA-Formatting/outputExpression.gct" /> - <output name="outPhenotypes" file="./GSEA-Formatting/outputPhenotypesTreatment.cls" /> - <output name="log" file="./GSEA-Formatting/outputRanks.log" /> + <conditional name="mainCondition"> + <param name="selection" value="classicGSEA" /> + <param name="expressionData" value="./NormalizedData.tabular" /> + <param name="conditionInformation" value="./FactorFileGenerator/output/conditionsFile.csv" /> + <param name="factorToInclude" value="Treatment" /> + </conditional> + <output name="outExpression" file="./FormatForGSEA/output/outputExpression_A.gct" /> + <output name="outPhenotypes" file="./FormatForGSEA/output/outputPhenotypes_A.cls" /> + <output name="log" file="./FormatForGSEA/output/outputLog_A.txt" /> </test> + <test maxseconds="3600" > - <param name="selection" value="classicGSEA" /> - <param name="expressionData" value="./NormalizedData.tabular" /> - <param name="conditionInformation" value="./conditionGroups.txt" /> - <param name="factorToInclude" value="Type" /> - <output name="outExpression" file="./GSEA-Formatting/outputExpression.gct" /> - <output name="outPhenotypes" file="./GSEA-Formatting/outputPhenotypesType.cls" /> - <output name="log" file="./GSEA-Formatting/outputRanks.log" /> - </test> - <test maxseconds="3600" > - <param name="selection" value="rankedGSEA" /> - <param name="differentialAnalysis" value="./LIMMAstatistics.tabular" /> - <param name="comparisonsToUse" value="WT*Control-KO*Control" /> - <param name="rankingIndice" value="FC" /> - <param name="pvalThreshold" value="0.05" /> - <output name="outRankedGenes" file="./GSEA-Formatting/outputRanks.rnk" /> - <output name="log" file="./GSEA-Formatting/outputRanks.log" /> + <conditional name="mainCondition"> + <param name="selection" value="rankedGSEA" /> + <param name="differentialAnalysis" value="./DiffExprLimma/output/outputStat.csv" /> + <param name="comparisonsToUse" value="TreatVsControl" /> + <param name="rankingIndice" value="absFC" /> + <param name="pvalThreshold" value="0.05" /> + </conditional> + <output name="outRankedGenes" file="./FormatForGSEA/output/outputRanks_B.rnk" /> + <output name="log" file="./FormatForGSEA/output/outputLog_B.txt" /> </test> </tests> <help>
--- a/src/General_functions.py Fri Jun 26 09:36:46 2020 -0400 +++ b/src/General_functions.py Wed Sep 09 10:36:26 2020 +0000 @@ -1,5 +1,6 @@ import re import numpy as np +import galaxy.model def get_column_names( file_path, toNotConsider=-1, each=1): options=[] @@ -76,7 +77,7 @@ def get_condition_file_names( file_list, toNotConsider=-1, each=1): options=[] - if not isinstance(file_list,list):#if input file is a tabular file, act as get_column_names + if not (isinstance(file_list,list) or isinstance(file_list,galaxy.model.HistoryDatasetCollectionAssociation) or isinstance(file_list,galaxy.model.DatasetCollection)) :#if input file is a tabular file, act as get_column_names inputfile = open(file_list.file_name) firstLine = next(inputfile).strip().split("\t") cpt=0 @@ -88,13 +89,17 @@ if cpt==each: cpt=0 inputfile.close() - else:#if input file is a .cel file list or a collection - if not hasattr(file_list[0],'collection'):#if it is not a collection, get name easily - for i, field_component in enumerate( file_list ): - options.append( ( field_component.name, field_component.name, False ) ) - else:#if the file is a collection, have to get deeper in the corresponding HistoryDatasetCollectionAssociation object - for i, field_component in enumerate( file_list[0].collection.elements ): - options.append( ( field_component.element_identifier, field_component.element_identifier, False ) ) + else:#if input file is a .cel file list, a DatasetCollection or a HistoryDatasetCollectionAssociation + if isinstance(file_list,list):#if it is a list, retrieve names easily + for i, field_component in enumerate( file_list ): + options.append( ( field_component.name, field_component.name, False ) ) + else:#if the file is a DatasetCollection, have to get deeper in the corresponding DatasetCollection object + if isinstance(file_list,galaxy.model.DatasetCollection):#if it is a list, retrieve names easily + for i, field_component in enumerate( file_list.elements ): + options.append( ( field_component.element_identifier, field_component.element_identifier, False ) ) + else:#if the file is a HistoryDatasetCollectionAssociation, have to get a little bit deeper in the corresponding HistoryDatasetCollectionAssociation object + for i, field_component in enumerate( file_list.collection.elements ): + options.append( ( field_component.element_identifier, field_component.element_identifier, False ) ) return options def generateFactorFile( file_list, factor_list, outputFileName, logFile): @@ -102,10 +107,7 @@ outputfile = open(outputFileName, 'w') outputLog = open(logFile, 'w') sampleList=[] - if not isinstance(file_list,list): - conditionNames=get_condition_file_names(file_list,0) #unique expression file, remove the first column (index=0) - else : - conditionNames=get_condition_file_names(file_list) #.CEL files + conditionNames=get_condition_file_names(file_list,0) #if it's a unique expression file, remove the first column (index=0) for iSample, sample_component in enumerate (conditionNames): sampleList.append(str(sample_component[1])) outputLog.write("[INFO] "+str(len(sampleList))+" sample are detected as input\n")