Galaxy |

Changeset 1:7a520f7169e1 (2020-09-09)

Previous changeset 0:4764dc6a1019 (2020-06-26) Next changeset 2:1f4a30d19264 (2020-09-25)

Commit message:
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit e2b27d6ff2eab66454f984dbf1a519192f41db97"

modified:
galaxy/wrappers/FactorFileGenerator.xml
src/General_functions.py

diff -r 4764dc6a1019 -r 7a520f7169e1 galaxy/wrappers/FactorFileGenerator.xml
--- a/galaxy/wrappers/FactorFileGenerator.xml Fri Jun 26 09:51:15 2020 -0400
+++ b/galaxy/wrappers/FactorFileGenerator.xml Wed Sep 09 10:29:24 2020 +0000

[

b'@@ -1,8 +1,9 @@\n-<tool name="GIANT-Factor file generator" id="giant_factor_generator" version="0.1.2">\n+<tool name="GIANT-Factor file generator" id="giant_factor_generator" version="0.1.3">\n <description>Generate factor file used by other GIANT tools</description>\n <requirements>\n </requirements>\n <code file="../../src/General_functions.py"/>\n+ \n <stdio>\n <regex match="Execution halted"\n source="both"\n@@ -18,8 +19,41 @@\n \n #import imp\n #set $general_functions=$imp.load_source(\'General_functions\', $__tool_directory__+\'/../../src/General_functions.py\')\n+ ##change for Planemo test\n+ ##set $general_functions=$imp.load_source(\'General_functions\', $__tool_directory__+\'/src/General_functions.py\')\n \n- #set $ret_code=$general_functions.generateFactorFile($inputCondition[\'inputData\'],$factorsSection[\'factorList\'],$outputData.file_name,$log.file_name)\n+ #if $inputCondition.selection == "CELcollection" and $inputCondition.selectCollectionStrat.how=="group_tags":\n+ #set $temp_factor_names = list()\n+ #for $factor in $inputCondition.selectCollectionStrat.factorListBis:\n+ #set $temp_factor = list()\n+ #for $level in $factor.valueList:\n+ #set $temp_level = \'\'\n+ #for $group in $level.valueConditions.value:\n+ #for $file in $inputCondition.inputData.get_datasets_for_group($group):\n+ printf "$group\\t $file.element_identifier\\n" >> ./TAGmatching.csv;\n+ #if $temp_level == \'\':\n+ #set $temp_level = str($file.element_identifier)\n+ #else:\n+ #set $temp_level += \',\'+str($file.element_identifier)\n+ #end if\n+ #end for\n+ #end for\n+ $temp_factor.append( {\'valueName\':str($level.valueName), \'valueConditions\':$temp_level} )\n+ #end for\n+ $temp_factor.reverse()\n+ $temp_factor_names.append( {\'factorName\':str($factor.factorName), \'valueList\':$temp_factor} )\n+ #end for\n+ #end if\n+\n+ #if $inputCondition.selection == "CELcollection":\n+ #if $inputCondition.selectCollectionStrat.how=="group_tags":\n+ #set $ret_code=$general_functions.generateFactorFile($inputCondition.inputData.collection,$temp_factor_names,$outputData.file_name,$log.file_name)\n+ #else:\n+ #set $ret_code=$general_functions.generateFactorFile($inputCondition.inputData.collection,$inputCondition.selectCollectionStrat.factorListBis,$outputData.file_name,$log.file_name)\n+ #end if\n+ #else:\n+ #set $ret_code=$general_functions.generateFactorFile($inputCondition.inputData,$inputCondition.factorList,$outputData.file_name,$log.file_name)\n+ #end if\n \n if [ $ret_code != 0 ]; then\n printf "[ERROR]Error during factor file generation\\n" >> $log;\n@@ -35,56 +69,120 @@\n <conditional name="inputCondition">\n <param name="selection" type="select" label="Input data type for sample names" force_select="true">\n <option value="normalizedData">Expression tabular file</option>\n- <option value="CELcollection">.CEL files</option>\n+ <option value="CELfiles">.CEL files</option>\n+ <option value="CELcollection">.CEL file collection</option>\n </param>\n <when value="normalizedData">\n- <param type="data" name="inputData" format="tabular" label="Select file" optional="false" multiple="false"/>\n+ <param type="data" name="inputData" format="tabular" label="Select a single dataset" optional="false" multiple="false"/>\n+\n+ <repeat name="factorList" title="Factor">\n+ <param type="text" name="factorName" value="" label="Factor name"/>\n+ <repeat name="valueList" title="Value">\n+ <param type="text" name="valueName" value="" label="Value name"/>\n+ <param name="valueConditions" type="select" optional="false" multiple="true" label="Select sample shar'..b'eat name="factorList" title="Factor">\n- <param type="text" name="factorName" value="" label="Factor name"/>\n- <repeat name="valueList" title="Value">\n- <param type="text" name="valueName" value="" label="Value name"/>\n- <param name="valueConditions" type="select" optional="false" multiple="true" label="Select sample sharing this value"\n- refresh_on_change="true" dynamic_options="get_condition_file_names(inputCondition[\'inputData\'])">\n- </param>\n- </repeat>\n- </repeat>\n- </section>\n-\t\n </inputs>\n \n <outputs>\n <data format="tabular" name="outputData" label="${title}_conditionsFile"/>\n-\n <data format="txt" name="log" label="${title}_Log" />\n </outputs>\n \n+\n+\n <tests>\n <test maxseconds="3600">\n- <param name="wfile" value="wiggle.wig" />\n- <param name="bfile" value="bedfile.bed" />\n- <param name="span" value="3000" />\n- <param name="pfres" value="50" />\n- <param name="lowersize" value="1000" />\n- <param name="middlesize" value="2000" />\n- <param name="uppersize" value="3000" />\n- <param name="lowerbisize" value="2500" />\n- <param name="upperbisize" value="5000" />\n- <param name="reldist" value="3000" />\n- <param name="genome" value="hg18" />\n- <param name="imagetype" value="PDF" />\n- <param name="enable" value="no" />\n- <output name="outputData" file="ceas_1/ceas_1.pdf" />\n+ <conditional name="inputCondition">\n+ <param name="selection" value="normalizedData" />\n+ <param name="inputData" value="./NormalizedData.tabular" />\n+ <repeat name="factorList">\n+ <param name="factorName" value="Strain" />\n+ <repeat name="valueList">\n+ <param name="valueName" value="WT" />\n+ <param name="valueConditions" value="GSM205769.CEL,GSM205772.CEL,GSM205768.CEL,GSM205767.CEL,GSM205766.CEL,GSM205771.CEL,GSM205770.CEL"/>\n+ </repeat>\n+ <repeat name="valueList">\n+ <param name="valueName" value="KO" />\n+ <param name="valueConditions" value="GSM205777.CEL,GSM205776.CEL,GSM205781.CEL,GSM205773.CEL,GSM205780.CEL,GSM205779.CEL,GSM205782.CEL,GSM205775.CEL,GSM205774.CEL,GSM205778.CEL"/>\n+ </repeat>\n+ </repeat>\n+ <repeat name="factorList">\n+ <param name="factorName" value="Treatment" />\n+ <repeat name="valueList">\n+ <param name="valueName" value="Control" />\n+ <param name="valueConditions" value="GSM205777.CEL,GSM205776.CEL,GSM205773.CEL,GSM205775.CEL,GSM205774.CEL,GSM205768.CEL,GSM205767.CEL,GSM205766.CEL"/>\n+ </repeat>\n+ <repeat name="valueList">\n+ <param name="valueName" value="Treat" />\n+ <param name="valueConditions" value="GSM205781.CEL,GSM205769.CEL,GSM205772.CEL,GSM205780.CEL,GSM205779.CEL,GSM205782.CEL,GSM205778.CEL,GSM205771.CEL,GSM205770.CEL"/>\n+ </repeat>\n+ </repeat>\n+ </conditional>\n+ <output name="outputData" file="./FactorFileGenerator/output/conditionsFile.csv" />\n+ <output name="log" file="./FactorFileGenerator/output/outputLog.txt" /> \n </test>\n-</tests> \n+ </tests> \n+\n+\n+\n <help>\n <![CDATA[\n **What it does ?**\n@@ -111,7 +209,17 @@\n \n OR\n \n-- **.CEL files** of your study (you should select multiple .CEL files or unique collection file).\n+- **.CEL files** of your study (you should select multiple .CEL files).\n+\n+OR\n+\n+- **.CEL file collection** of your study (you should select a unique collection file).\n+\n+ \\- **Individual selection** of files to associate to factor values.\n+ \n+ or\n+\n+ \\- **Tag selection** to associate samples sharing the same tag to factor values.\n \n \\- **Factor definition**\n \n@@ -119,7 +227,7 @@\n \n - **Value name** of different states for the current factor as \'KO\' or \'WT\' for \'Strain\' factor (please avoid special characters)\n \n-- **Select sample** to assign to current value\n+- **Select sample/tag** to assign to current value\n \n -----\n \n@@ -139,7 +247,4 @@\n \n ]]> </help>\n \n- <citations>\n- </citations>\n-\n </tool>\n'

diff -r 4764dc6a1019 -r 7a520f7169e1 src/General_functions.py
--- a/src/General_functions.py Fri Jun 26 09:51:15 2020 -0400
+++ b/src/General_functions.py Wed Sep 09 10:29:24 2020 +0000

[

@@ -1,5 +1,6 @@
import re
import numpy as np
+import galaxy.model

def get_column_names( file_path, toNotConsider=-1, each=1):
options=[]
@@ -76,7 +77,7 @@

def get_condition_file_names( file_list, toNotConsider=-1, each=1):
options=[]
- if not isinstance(file_list,list):#if input file is a tabular file, act as get_column_names
+ if not (isinstance(file_list,list) or isinstance(file_list,galaxy.model.HistoryDatasetCollectionAssociation) or isinstance(file_list,galaxy.model.DatasetCollection)) :#if input file is a tabular file, act as get_column_names
inputfile = open(file_list.file_name)
firstLine = next(inputfile).strip().split("\t")
cpt=0
@@ -88,13 +89,17 @@
if cpt==each:
cpt=0
inputfile.close()
- else:#if input file is a .cel file list or a collection
- if not hasattr(file_list[0],'collection'):#if it is not a collection, get name easily
- for i, field_component in enumerate( file_list ):
- options.append( ( field_component.name, field_component.name, False ) )
- else:#if the file is a collection, have to get deeper in the corresponding HistoryDatasetCollectionAssociation object
- for i, field_component in enumerate( file_list[0].collection.elements ):
- options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
+ else:#if input file is a .cel file list, a DatasetCollection or a HistoryDatasetCollectionAssociation
+ if isinstance(file_list,list):#if it is a list, retrieve names easily
+ for i, field_component in enumerate( file_list ):
+ options.append( ( field_component.name, field_component.name, False ) )
+ else:#if the file is a DatasetCollection, have to get deeper in the corresponding DatasetCollection object
+ if isinstance(file_list,galaxy.model.DatasetCollection):#if it is a list, retrieve names easily
+ for i, field_component in enumerate( file_list.elements ):
+ options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
+ else:#if the file is a HistoryDatasetCollectionAssociation, have to get a little bit deeper in the corresponding HistoryDatasetCollectionAssociation object
+ for i, field_component in enumerate( file_list.collection.elements ):
+ options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
return options

def generateFactorFile( file_list, factor_list, outputFileName, logFile):
@@ -102,10 +107,7 @@
outputfile = open(outputFileName, 'w')
outputLog = open(logFile, 'w')
sampleList=[]
- if not isinstance(file_list,list):
- conditionNames=get_condition_file_names(file_list,0) #unique expression file, remove the first column (index=0)
- else :
- conditionNames=get_condition_file_names(file_list) #.CEL files
+ conditionNames=get_condition_file_names(file_list,0) #if it's a unique expression file, remove the first column (index=0)
for iSample, sample_component in enumerate (conditionNames):
sampleList.append(str(sample_component[1]))
outputLog.write("[INFO] "+str(len(sampleList))+" sample are detected as input\n")