Mercurial > repos > vandelj > giant_factor_generator
changeset 1:7a520f7169e1 draft
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit e2b27d6ff2eab66454f984dbf1a519192f41db97"
author | vandelj |
---|---|
date | Wed, 09 Sep 2020 10:29:24 +0000 |
parents | 4764dc6a1019 |
children | 1f4a30d19264 |
files | galaxy/wrappers/FactorFileGenerator.xml src/General_functions.py |
diffstat | 2 files changed, 159 insertions(+), 52 deletions(-) [+] |
line wrap: on
line diff
--- a/galaxy/wrappers/FactorFileGenerator.xml Fri Jun 26 09:51:15 2020 -0400 +++ b/galaxy/wrappers/FactorFileGenerator.xml Wed Sep 09 10:29:24 2020 +0000 @@ -1,8 +1,9 @@ -<tool name="GIANT-Factor file generator" id="giant_factor_generator" version="0.1.2"> +<tool name="GIANT-Factor file generator" id="giant_factor_generator" version="0.1.3"> <description>Generate factor file used by other GIANT tools</description> <requirements> </requirements> <code file="../../src/General_functions.py"/> + <!--<code file="./src/General_functions.py"/> change for Planemo test--> <stdio> <regex match="Execution halted" source="both" @@ -18,8 +19,41 @@ #import imp #set $general_functions=$imp.load_source('General_functions', $__tool_directory__+'/../../src/General_functions.py') + ##change for Planemo test + ##set $general_functions=$imp.load_source('General_functions', $__tool_directory__+'/src/General_functions.py') - #set $ret_code=$general_functions.generateFactorFile($inputCondition['inputData'],$factorsSection['factorList'],$outputData.file_name,$log.file_name) + #if $inputCondition.selection == "CELcollection" and $inputCondition.selectCollectionStrat.how=="group_tags": + #set $temp_factor_names = list() + #for $factor in $inputCondition.selectCollectionStrat.factorListBis: + #set $temp_factor = list() + #for $level in $factor.valueList: + #set $temp_level = '' + #for $group in $level.valueConditions.value: + #for $file in $inputCondition.inputData.get_datasets_for_group($group): + printf "$group\t $file.element_identifier\n" >> ./TAGmatching.csv; + #if $temp_level == '': + #set $temp_level = str($file.element_identifier) + #else: + #set $temp_level += ','+str($file.element_identifier) + #end if + #end for + #end for + $temp_factor.append( {'valueName':str($level.valueName), 'valueConditions':$temp_level} ) + #end for + $temp_factor.reverse() + $temp_factor_names.append( {'factorName':str($factor.factorName), 'valueList':$temp_factor} ) + #end for + #end if + + #if $inputCondition.selection == "CELcollection": + #if $inputCondition.selectCollectionStrat.how=="group_tags": + #set $ret_code=$general_functions.generateFactorFile($inputCondition.inputData.collection,$temp_factor_names,$outputData.file_name,$log.file_name) + #else: + #set $ret_code=$general_functions.generateFactorFile($inputCondition.inputData.collection,$inputCondition.selectCollectionStrat.factorListBis,$outputData.file_name,$log.file_name) + #end if + #else: + #set $ret_code=$general_functions.generateFactorFile($inputCondition.inputData,$inputCondition.factorList,$outputData.file_name,$log.file_name) + #end if if [ $ret_code != 0 ]; then printf "[ERROR]Error during factor file generation\n" >> $log; @@ -35,56 +69,120 @@ <conditional name="inputCondition"> <param name="selection" type="select" label="Input data type for sample names" force_select="true"> <option value="normalizedData">Expression tabular file</option> - <option value="CELcollection">.CEL files</option> + <option value="CELfiles">.CEL files</option> + <option value="CELcollection">.CEL file collection</option> </param> <when value="normalizedData"> - <param type="data" name="inputData" format="tabular" label="Select file" optional="false" multiple="false"/> + <param type="data" name="inputData" format="tabular" label="Select a single dataset" optional="false" multiple="false"/> + + <repeat name="factorList" title="Factor"> + <param type="text" name="factorName" value="" label="Factor name"/> + <repeat name="valueList" title="Value"> + <param type="text" name="valueName" value="" label="Value name"/> + <param name="valueConditions" type="select" optional="false" multiple="true" label="Select sample sharing this value" + refresh_on_change="true" dynamic_options="get_condition_file_names(inputCondition['inputData'],0)"> + </param> + </repeat> + </repeat> + + </when> + <when value="CELfiles"> + <param type="data" name="inputData" format="cel" label="Select multiple files" optional="false" multiple="true"> + <validator type="empty_field" message="At least two data files should be selected"></validator> + </param> + + <repeat name="factorList" title="Factor"> + <param type="text" name="factorName" value="" label="Factor name"/> + <repeat name="valueList" title="Value"> + <param type="text" name="valueName" value="" label="Value name"/> + <param name="valueConditions" type="select" optional="false" multiple="true" label="Select sample sharing this value" + refresh_on_change="true" dynamic_options="get_condition_file_names(inputCondition['inputData'])"> + </param> + </repeat> + </repeat> + </when> <when value="CELcollection"> - <param type="data" name="inputData" format="cel" label="Select files" optional="false" multiple="true"> - <validator type="empty_dataset" message="At least one data file should be selected"></validator> + <param type="data_collection" name="inputData" format="cel" label="Select a single dataset collection" optional="false" multiple="true"> + <validator type="empty_field" message="One data collection should be selected"></validator> </param> + + <conditional name="selectCollectionStrat"> + <param name="how" type="select" label="Select how .CEL files will be assigned to factor values"> + <option value="individualSelection">Select individually each .CEL file from the collection</option> + <option value="group_tags">Use associated .CEL file tags</option> + </param> + <when value="individualSelection"> + + <repeat name="factorListBis" title="Factor"> + <param type="text" name="factorName" value="" label="Factor name"/> + <repeat name="valueList" title="Value"> + <param type="text" name="valueName" value="" label="Value name"/> + <param name="valueConditions" type="select" optional="false" multiple="true" label="Select sample sharing this value" + refresh_on_change="true" dynamic_options="get_condition_file_names(inputCondition['inputData'])"> + </param> + </repeat> + </repeat> + + </when> + <when value="group_tags"> + + <repeat name="factorListBis" title="Factor"> + <param type="text" name="factorName" value="" label="Factor name"/> + <repeat name="valueList" title="Value"> + <param type="text" name="valueName" value="" label="Value name"/> + <param name="valueConditions" type="group_tag" data_ref="inputData" multiple="true" label="Select groups sharing this value"/> + </repeat> + </repeat> + + </when> + </conditional> </when> </conditional> - - <section name="factorsSection" title="Factor definition" expanded="True"> - <repeat name="factorList" title="Factor"> - <param type="text" name="factorName" value="" label="Factor name"/> - <repeat name="valueList" title="Value"> - <param type="text" name="valueName" value="" label="Value name"/> - <param name="valueConditions" type="select" optional="false" multiple="true" label="Select sample sharing this value" - refresh_on_change="true" dynamic_options="get_condition_file_names(inputCondition['inputData'])"> - </param> - </repeat> - </repeat> - </section> - </inputs> <outputs> <data format="tabular" name="outputData" label="${title}_conditionsFile"/> - <data format="txt" name="log" label="${title}_Log" /> </outputs> + + <tests> <test maxseconds="3600"> - <param name="wfile" value="wiggle.wig" /> - <param name="bfile" value="bedfile.bed" /> - <param name="span" value="3000" /> - <param name="pfres" value="50" /> - <param name="lowersize" value="1000" /> - <param name="middlesize" value="2000" /> - <param name="uppersize" value="3000" /> - <param name="lowerbisize" value="2500" /> - <param name="upperbisize" value="5000" /> - <param name="reldist" value="3000" /> - <param name="genome" value="hg18" /> - <param name="imagetype" value="PDF" /> - <param name="enable" value="no" /> - <output name="outputData" file="ceas_1/ceas_1.pdf" /> + <conditional name="inputCondition"> + <param name="selection" value="normalizedData" /> + <param name="inputData" value="./NormalizedData.tabular" /> + <repeat name="factorList"> + <param name="factorName" value="Strain" /> + <repeat name="valueList"> + <param name="valueName" value="WT" /> + <param name="valueConditions" value="GSM205769.CEL,GSM205772.CEL,GSM205768.CEL,GSM205767.CEL,GSM205766.CEL,GSM205771.CEL,GSM205770.CEL"/> + </repeat> + <repeat name="valueList"> + <param name="valueName" value="KO" /> + <param name="valueConditions" value="GSM205777.CEL,GSM205776.CEL,GSM205781.CEL,GSM205773.CEL,GSM205780.CEL,GSM205779.CEL,GSM205782.CEL,GSM205775.CEL,GSM205774.CEL,GSM205778.CEL"/> + </repeat> + </repeat> + <repeat name="factorList"> + <param name="factorName" value="Treatment" /> + <repeat name="valueList"> + <param name="valueName" value="Control" /> + <param name="valueConditions" value="GSM205777.CEL,GSM205776.CEL,GSM205773.CEL,GSM205775.CEL,GSM205774.CEL,GSM205768.CEL,GSM205767.CEL,GSM205766.CEL"/> + </repeat> + <repeat name="valueList"> + <param name="valueName" value="Treat" /> + <param name="valueConditions" value="GSM205781.CEL,GSM205769.CEL,GSM205772.CEL,GSM205780.CEL,GSM205779.CEL,GSM205782.CEL,GSM205778.CEL,GSM205771.CEL,GSM205770.CEL"/> + </repeat> + </repeat> + </conditional> + <output name="outputData" file="./FactorFileGenerator/output/conditionsFile.csv" /> + <output name="log" file="./FactorFileGenerator/output/outputLog.txt" /> </test> -</tests> + </tests> + + + <help> <![CDATA[ **What it does ?** @@ -111,7 +209,17 @@ OR -- **.CEL files** of your study (you should select multiple .CEL files or unique collection file). +- **.CEL files** of your study (you should select multiple .CEL files). + +OR + +- **.CEL file collection** of your study (you should select a unique collection file). + + \- **Individual selection** of files to associate to factor values. + + or + + \- **Tag selection** to associate samples sharing the same tag to factor values. \- **Factor definition** @@ -119,7 +227,7 @@ - **Value name** of different states for the current factor as 'KO' or 'WT' for 'Strain' factor (please avoid special characters) -- **Select sample** to assign to current value +- **Select sample/tag** to assign to current value ----- @@ -139,7 +247,4 @@ ]]> </help> - <citations> - </citations> - </tool>
--- a/src/General_functions.py Fri Jun 26 09:51:15 2020 -0400 +++ b/src/General_functions.py Wed Sep 09 10:29:24 2020 +0000 @@ -1,5 +1,6 @@ import re import numpy as np +import galaxy.model def get_column_names( file_path, toNotConsider=-1, each=1): options=[] @@ -76,7 +77,7 @@ def get_condition_file_names( file_list, toNotConsider=-1, each=1): options=[] - if not isinstance(file_list,list):#if input file is a tabular file, act as get_column_names + if not (isinstance(file_list,list) or isinstance(file_list,galaxy.model.HistoryDatasetCollectionAssociation) or isinstance(file_list,galaxy.model.DatasetCollection)) :#if input file is a tabular file, act as get_column_names inputfile = open(file_list.file_name) firstLine = next(inputfile).strip().split("\t") cpt=0 @@ -88,13 +89,17 @@ if cpt==each: cpt=0 inputfile.close() - else:#if input file is a .cel file list or a collection - if not hasattr(file_list[0],'collection'):#if it is not a collection, get name easily - for i, field_component in enumerate( file_list ): - options.append( ( field_component.name, field_component.name, False ) ) - else:#if the file is a collection, have to get deeper in the corresponding HistoryDatasetCollectionAssociation object - for i, field_component in enumerate( file_list[0].collection.elements ): - options.append( ( field_component.element_identifier, field_component.element_identifier, False ) ) + else:#if input file is a .cel file list, a DatasetCollection or a HistoryDatasetCollectionAssociation + if isinstance(file_list,list):#if it is a list, retrieve names easily + for i, field_component in enumerate( file_list ): + options.append( ( field_component.name, field_component.name, False ) ) + else:#if the file is a DatasetCollection, have to get deeper in the corresponding DatasetCollection object + if isinstance(file_list,galaxy.model.DatasetCollection):#if it is a list, retrieve names easily + for i, field_component in enumerate( file_list.elements ): + options.append( ( field_component.element_identifier, field_component.element_identifier, False ) ) + else:#if the file is a HistoryDatasetCollectionAssociation, have to get a little bit deeper in the corresponding HistoryDatasetCollectionAssociation object + for i, field_component in enumerate( file_list.collection.elements ): + options.append( ( field_component.element_identifier, field_component.element_identifier, False ) ) return options def generateFactorFile( file_list, factor_list, outputFileName, logFile): @@ -102,10 +107,7 @@ outputfile = open(outputFileName, 'w') outputLog = open(logFile, 'w') sampleList=[] - if not isinstance(file_list,list): - conditionNames=get_condition_file_names(file_list,0) #unique expression file, remove the first column (index=0) - else : - conditionNames=get_condition_file_names(file_list) #.CEL files + conditionNames=get_condition_file_names(file_list,0) #if it's a unique expression file, remove the first column (index=0) for iSample, sample_component in enumerate (conditionNames): sampleList.append(str(sample_component[1])) outputLog.write("[INFO] "+str(len(sampleList))+" sample are detected as input\n")