Repository 'giant_factor_generator'
hg clone https://toolshed.g2.bx.psu.edu/repos/vandelj/giant_factor_generator

Changeset 1:7a520f7169e1 (2020-09-09)
Previous changeset 0:4764dc6a1019 (2020-06-26) Next changeset 2:1f4a30d19264 (2020-09-25)
Commit message:
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit e2b27d6ff2eab66454f984dbf1a519192f41db97"
modified:
galaxy/wrappers/FactorFileGenerator.xml
src/General_functions.py
b
diff -r 4764dc6a1019 -r 7a520f7169e1 galaxy/wrappers/FactorFileGenerator.xml
--- a/galaxy/wrappers/FactorFileGenerator.xml Fri Jun 26 09:51:15 2020 -0400
+++ b/galaxy/wrappers/FactorFileGenerator.xml Wed Sep 09 10:29:24 2020 +0000
[
b'@@ -1,8 +1,9 @@\n-<tool name="GIANT-Factor file generator" id="giant_factor_generator" version="0.1.2">\n+<tool name="GIANT-Factor file generator" id="giant_factor_generator" version="0.1.3">\n   <description>Generate factor file used by other GIANT tools</description>\n   <requirements>\n   </requirements>\n   <code file="../../src/General_functions.py"/>\n+  <!--<code file="./src/General_functions.py"/> change for Planemo test-->\n   <stdio>\n     <regex match="Execution halted"\n            source="both"\n@@ -18,8 +19,41 @@\n \n   #import imp\n   #set $general_functions=$imp.load_source(\'General_functions\', $__tool_directory__+\'/../../src/General_functions.py\')\n+  ##change for Planemo test\n+  ##set $general_functions=$imp.load_source(\'General_functions\', $__tool_directory__+\'/src/General_functions.py\')\n \n-  #set $ret_code=$general_functions.generateFactorFile($inputCondition[\'inputData\'],$factorsSection[\'factorList\'],$outputData.file_name,$log.file_name)\n+  #if $inputCondition.selection == "CELcollection" and $inputCondition.selectCollectionStrat.how=="group_tags":\n+    #set $temp_factor_names = list()\n+    #for $factor in $inputCondition.selectCollectionStrat.factorListBis:\n+          #set $temp_factor = list()\n+          #for $level in $factor.valueList:\n+            #set $temp_level = \'\'\n+            #for $group in $level.valueConditions.value:\n+                #for $file in $inputCondition.inputData.get_datasets_for_group($group):\n+                    printf "$group\\t $file.element_identifier\\n" >> ./TAGmatching.csv;\n+                    #if $temp_level == \'\':\n+                      #set $temp_level = str($file.element_identifier)\n+                    #else:\n+                      #set $temp_level += \',\'+str($file.element_identifier)\n+                    #end if\n+                #end for\n+            #end for\n+            $temp_factor.append( {\'valueName\':str($level.valueName), \'valueConditions\':$temp_level} )\n+          #end for\n+          $temp_factor.reverse()\n+          $temp_factor_names.append( {\'factorName\':str($factor.factorName), \'valueList\':$temp_factor} )\n+    #end for\n+  #end if\n+\n+  #if $inputCondition.selection == "CELcollection":\n+    #if $inputCondition.selectCollectionStrat.how=="group_tags":\n+      #set $ret_code=$general_functions.generateFactorFile($inputCondition.inputData.collection,$temp_factor_names,$outputData.file_name,$log.file_name)\n+    #else:\n+      #set $ret_code=$general_functions.generateFactorFile($inputCondition.inputData.collection,$inputCondition.selectCollectionStrat.factorListBis,$outputData.file_name,$log.file_name)\n+    #end if\n+  #else:\n+    #set $ret_code=$general_functions.generateFactorFile($inputCondition.inputData,$inputCondition.factorList,$outputData.file_name,$log.file_name)\n+  #end if\n \n   if [ $ret_code != 0 ]; then\n     printf "[ERROR]Error during factor file generation\\n" >> $log;\n@@ -35,56 +69,120 @@\n   <conditional name="inputCondition">\n       <param name="selection" type="select" label="Input data type for sample names" force_select="true">\n         <option value="normalizedData">Expression tabular file</option>\n-        <option value="CELcollection">.CEL files</option>\n+        <option value="CELfiles">.CEL files</option>\n+        <option value="CELcollection">.CEL file collection</option>\n       </param>\n       <when value="normalizedData">\n-        <param type="data" name="inputData" format="tabular" label="Select file" optional="false" multiple="false"/>\n+        <param type="data" name="inputData" format="tabular" label="Select a single dataset" optional="false" multiple="false"/>\n+\n+            <repeat name="factorList" title="Factor">\n+              <param type="text" name="factorName" value="" label="Factor name"/>\n+              <repeat name="valueList" title="Value">\n+                <param type="text" name="valueName" value="" label="Value name"/>\n+                <param name="valueConditions" type="select" optional="false" multiple="true" label="Select sample shar'..b'eat name="factorList" title="Factor">\n-        <param type="text" name="factorName" value="" label="Factor name"/>\n-        <repeat name="valueList" title="Value">\n-          <param type="text" name="valueName" value="" label="Value name"/>\n-          <param name="valueConditions" type="select" optional="false" multiple="true" label="Select sample sharing this value"\n-            refresh_on_change="true"  dynamic_options="get_condition_file_names(inputCondition[\'inputData\'])">\n-          </param>\n-        </repeat>\n-    </repeat>\n-  </section>\n-\t\n   </inputs>\n \n   <outputs>\n     <data format="tabular" name="outputData" label="${title}_conditionsFile"/>\n-\n     <data format="txt" name="log" label="${title}_Log" />\n   </outputs>\n   \n+\n+\n  <tests>\n   <test maxseconds="3600">\n-    <param name="wfile" value="wiggle.wig" />\n-    <param name="bfile" value="bedfile.bed" />\n-    <param name="span" value="3000" />\n-    <param name="pfres" value="50" />\n-    <param name="lowersize" value="1000" />\n-    <param name="middlesize" value="2000" />\n-    <param name="uppersize" value="3000" />\n-    <param name="lowerbisize" value="2500" />\n-    <param name="upperbisize" value="5000" />\n-    <param name="reldist" value="3000" />\n-    <param name="genome" value="hg18" />\n-    <param name="imagetype" value="PDF" />\n-    <param name="enable" value="no" />\n-    <output name="outputData" file="ceas_1/ceas_1.pdf" />\n+    <conditional name="inputCondition">\n+      <param name="selection" value="normalizedData" />\n+      <param name="inputData" value="./NormalizedData.tabular" />\n+      <repeat name="factorList">\n+        <param name="factorName" value="Strain" />\n+        <repeat name="valueList">\n+          <param name="valueName" value="WT" />\n+          <param name="valueConditions" value="GSM205769.CEL,GSM205772.CEL,GSM205768.CEL,GSM205767.CEL,GSM205766.CEL,GSM205771.CEL,GSM205770.CEL"/>\n+        </repeat>\n+        <repeat name="valueList">\n+          <param name="valueName" value="KO" />\n+          <param name="valueConditions" value="GSM205777.CEL,GSM205776.CEL,GSM205781.CEL,GSM205773.CEL,GSM205780.CEL,GSM205779.CEL,GSM205782.CEL,GSM205775.CEL,GSM205774.CEL,GSM205778.CEL"/>\n+        </repeat>\n+      </repeat>\n+      <repeat name="factorList">\n+        <param name="factorName" value="Treatment" />\n+        <repeat name="valueList">\n+          <param name="valueName" value="Control" />\n+          <param name="valueConditions" value="GSM205777.CEL,GSM205776.CEL,GSM205773.CEL,GSM205775.CEL,GSM205774.CEL,GSM205768.CEL,GSM205767.CEL,GSM205766.CEL"/>\n+        </repeat>\n+        <repeat name="valueList">\n+          <param name="valueName" value="Treat" />\n+          <param name="valueConditions" value="GSM205781.CEL,GSM205769.CEL,GSM205772.CEL,GSM205780.CEL,GSM205779.CEL,GSM205782.CEL,GSM205778.CEL,GSM205771.CEL,GSM205770.CEL"/>\n+      </repeat>\n+      </repeat>\n+    </conditional>\n+    <output name="outputData" file="./FactorFileGenerator/output/conditionsFile.csv" />\n+    <output name="log" file="./FactorFileGenerator/output/outputLog.txt" />    \n   </test>\n-</tests> \n+ </tests> \n+\n+\n+\n   <help>\n <![CDATA[\n **What it does ?**\n@@ -111,7 +209,17 @@\n \n OR\n \n-- **.CEL files** of your study (you should select multiple .CEL files or unique collection file).\n+- **.CEL files** of your study (you should select multiple .CEL files).\n+\n+OR\n+\n+- **.CEL file collection** of your study (you should select a unique collection file).\n+\n+  \\- **Individual selection** of files to associate to factor values.\n+  \n+    or\n+\n+  \\- **Tag selection** to associate samples sharing the same tag to factor values.\n \n \\- **Factor definition**\n \n@@ -119,7 +227,7 @@\n \n - **Value name** of different states for the current factor as \'KO\' or \'WT\' for \'Strain\' factor (please avoid special characters)\n \n-- **Select sample** to assign to current value\n+- **Select sample/tag** to assign to current value\n \n -----\n \n@@ -139,7 +247,4 @@\n \n ]]>  </help>\n \n- <citations>\n- </citations>\n-\n </tool>\n'
b
diff -r 4764dc6a1019 -r 7a520f7169e1 src/General_functions.py
--- a/src/General_functions.py Fri Jun 26 09:51:15 2020 -0400
+++ b/src/General_functions.py Wed Sep 09 10:29:24 2020 +0000
[
@@ -1,5 +1,6 @@
 import re
 import numpy as np
+import galaxy.model
 
 def get_column_names( file_path, toNotConsider=-1, each=1):
  options=[]
@@ -76,7 +77,7 @@
 
 def get_condition_file_names( file_list, toNotConsider=-1, each=1):
  options=[]
- if not isinstance(file_list,list):#if input file is a tabular file, act as get_column_names
+ if not (isinstance(file_list,list) or isinstance(file_list,galaxy.model.HistoryDatasetCollectionAssociation) or isinstance(file_list,galaxy.model.DatasetCollection)) :#if input file is a tabular file, act as get_column_names
  inputfile = open(file_list.file_name)
  firstLine = next(inputfile).strip().split("\t")
  cpt=0
@@ -88,13 +89,17 @@
  if cpt==each:
  cpt=0
  inputfile.close()
- else:#if input file is a .cel file list or a collection
- if not hasattr(file_list[0],'collection'):#if it is not a collection, get name easily
- for i, field_component in enumerate( file_list ):
- options.append( ( field_component.name, field_component.name, False ) )
- else:#if the file is a collection, have to get deeper in the corresponding HistoryDatasetCollectionAssociation object
- for i, field_component in enumerate( file_list[0].collection.elements ):
- options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
+ else:#if input file is a .cel file list, a DatasetCollection or a HistoryDatasetCollectionAssociation
+ if isinstance(file_list,list):#if it is a list, retrieve names easily
+ for i, field_component in enumerate( file_list ):
+ options.append( ( field_component.name, field_component.name, False ) )
+ else:#if the file is a DatasetCollection, have to get deeper in the corresponding DatasetCollection object
+ if isinstance(file_list,galaxy.model.DatasetCollection):#if it is a list, retrieve names easily
+ for i, field_component in enumerate( file_list.elements ):
+ options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
+ else:#if the file is a HistoryDatasetCollectionAssociation, have to get a little bit deeper in the corresponding HistoryDatasetCollectionAssociation object
+ for i, field_component in enumerate( file_list.collection.elements ):
+ options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
  return options
 
 def generateFactorFile( file_list, factor_list, outputFileName, logFile):
@@ -102,10 +107,7 @@
  outputfile = open(outputFileName, 'w')
  outputLog = open(logFile, 'w')
  sampleList=[]
- if not isinstance(file_list,list):
- conditionNames=get_condition_file_names(file_list,0) #unique expression file, remove the first column (index=0)
- else :
- conditionNames=get_condition_file_names(file_list) #.CEL files
+ conditionNames=get_condition_file_names(file_list,0) #if it's a unique expression file, remove the first column (index=0)
  for iSample, sample_component in enumerate (conditionNames):
  sampleList.append(str(sample_component[1]))
  outputLog.write("[INFO] "+str(len(sampleList))+" sample are detected as input\n")