diff keras_batch_models.xml @ 0:000a3868885b draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author bgruening
date Fri, 09 Aug 2019 07:17:20 -0400
parents
children 62b85c8ad8fa
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/keras_batch_models.xml	Fri Aug 09 07:17:20 2019 -0400
@@ -0,0 +1,170 @@
+<tool id="keras_batch_models" name="Build Deep learning Batch Training Models" version="@KERAS_VERSION@">
+  <description>with online data generator for Genomic/Protein sequences and images</description>
+  <macros>
+    <import>main_macros.xml</import>
+    <import>keras_macros.xml</import>
+  </macros>
+  <expand macro="python_requirements"/>
+  <expand macro="macro_stdio"/>
+  <version_command>echo "@KERAS_VERSION@"</version_command>
+  <command>
+    <![CDATA[
+    python '$__tool_directory__/keras_deep_learning.py'
+           --inputs '$inputs'
+           --model_json '$mode_selection.infile_json'
+           --tool_id 'keras_batch_models'
+           --outfile '$outfile'
+           #if $get_params
+           --outfile_params '$outfile_params'
+           #end if
+    ]]>
+  </command>
+  <configfiles>
+    <inputs name="inputs"/>
+  </configfiles>
+  <inputs>
+    <conditional name="mode_selection">
+      <param name="mode_type" type="select" label="Choose a building mode">
+        <option value="train_model" selected="true">Build a training model</option>
+      </param>
+      <when value="train_model">
+        <param name="infile_json" type="data" format="json" label="Select the dataset containing model configurations (JSON)"/>
+        <param name="learning_type" type="select" label="Select a learning class">
+          <option value="KerasGBatchClassifier">KerasGBatchClassifier -- Build a training model with batch data generator</option>
+        </param>
+        <conditional name="generator_selection">
+          <param name="generator_type" type="select" label="Select a batch data generator">
+            <option value="FastaDNABatchGenerator" selected="true">FastaDNABatchGenerator -- Online transformation of DNA sequences</option>
+            <option value="FastaRNABatchGenerator">FastaRNABatchGenerator -- Online transformation of RNA sequences</option>
+            <option value="FastaProteinBatchGenerator">FastaProteinBatchGenerator -- Online transformation of Protein sequences</option>
+            <option value="GenomicIntervalBatchGenerator">GenomicIntervalBatchGenerator - Online transformation of genomic sequences from a reference genome and intervals</option>
+            <!--option value="ImageBatchGenerator">ImageBatchGenerator - Online transformation of images</option-->
+          </param>
+          <when value="FastaDNABatchGenerator">
+            <expand macro="params_fasta_dna_batch_generator"/>
+          </when>
+          <when value="FastaRNABatchGenerator">
+            <expand macro="params_fasta_dna_batch_generator"/>
+          </when>
+          <when value="FastaProteinBatchGenerator">
+            <expand macro="params_fasta_protein_batch_generator"/>
+          </when>
+          <when value="GenomicIntervalBatchGenerator">
+            <expand macro="params_genomic_interval_batch_generator"/>
+          </when>
+          <!--when value="ImageBatchGenerator">
+            <expand macro="params_image_batch_generator"/>
+          </when-->
+        </conditional>
+        <expand macro="keras_compile_params_section"/>
+        <expand macro="keras_fit_params_section"/>
+        <param name="class_positive_factor" type="float" value="1" optional="true" label="class_positive_factor" help="For binary classification only. If int, like 5, will convert to class_weight {0: 1, 1: 5}. If float, 0.2, corresponds to class_weight {0: 1/0.2, 1: 1}"/>
+        <param name="prediction_steps" type="integer" value="" optional="true" label="prediction_steps" help="Prediction steps. Optional. If None, it equals number of samples divided by `batch_size`."/>
+        <param name="random_seed" type="integer" value="" optional="true" label="Random Seed" help="Integer or blank for None. Warning: when random seed is set to an integer, training will be running in single thread mode, which may cause slowness."/>
+      </when>
+    </conditional>
+    <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Output parameters for searchCV?" help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool."/>
+  </inputs>
+  <outputs>
+    <data format="zip" name="outfile" label="Keras Batch Classifier  on ${on_string}"/>
+    <data format="tabular" name="outfile_params" label="get_params for Keras Batch Classifier on ${on_string}">
+      <filter>get_params</filter>
+    </data>
+  </outputs>
+  <tests>
+    <test>
+      <conditional name="mode_selection">
+        <param name="infile_json" value="deepsear_1feature.json" ftype="json"/>
+        <param name="learning_type" value="KerasGBatchClassifier"/>
+        <conditional name="generator_selection">
+          <param name="generator_type" value="GenomicIntervalBatchGenerator"/>
+          <param name="seed" value="999"/>
+        </conditional>
+        <section name="fit_params">
+          <param name="epochs" value="100"/>
+        </section>
+      </conditional>
+      <output name="outfile" file="keras_batch_model03" compare="sim_size" delta="5"/>
+    </test>
+    <test>
+      <conditional name="mode_selection">
+        <param name="infile_json" value="keras01.json" ftype="json"/>
+        <param name="learning_type" value="KerasGBatchClassifier"/>
+        <conditional name="generator_selection">
+          <param name="generator_type" value="FastaDNABatchGenerator"/>
+          <param name="seed" value="999"/>
+        </conditional>
+        <section name="fit_params">
+          <param name="epochs" value="100"/>
+          <repeat name="callbacks">
+            <conditional name="callback_selection">
+              <param name="callback_type" value="EarlyStopping"/>
+              <param name="monitor" value="val_loss"/>
+            </conditional>
+          </repeat>
+        </section>
+      </conditional>
+      <output name="outfile" file="keras_batch_model02" compare="sim_size" delta="5"/>
+    </test>
+    <test>
+      <conditional name="mode_selection">
+        <param name="infile_json" value="keras01.json" ftype="json"/>
+        <param name="learning_type" value="KerasGBatchClassifier"/>
+        <conditional name="generator_selection">
+          <param name="generator_type" value="FastaDNABatchGenerator"/>
+          <param name="seed" value="999"/>
+        </conditional>
+        <section name="fit_params">
+          <param name="epochs" value="100"/>
+          <repeat name="callbacks">
+            <conditional name="callback_selection">
+              <param name="callback_type" value="None"/>
+            </conditional>
+          </repeat>
+        </section>
+      </conditional>
+      <param name="get_params" value="true" />
+      <output name="outfile" file="keras_batch_model01" compare="sim_size" delta="5"/>
+      <output name="outfile_params" file="keras_batch_params01.tabular" />
+    </test>
+  </tests>
+  <help>
+      <![CDATA[
+**What does this tool do?**
+
+This tool builds deep learning training models using API `galaxy_ml.keras_galaxy_model.KerasGBatchClassifier`_, which takes parameters in FIVE categories. 
+
+- a JSON file that contains layer information for a deep learning model.
+
+- a data batch generator that converts raw data, such as images and genomic sequences, into numerical data to be able to fit the deep learning model. That the cycle of `batch conversion - fitting` occur in stream mode, also called on-line transformation, guarantees the training to be CPU and memory efficient. Reference: `galaxy_ml.preprocessors.FastaDNABatchGenerator`_, `galaxy_ml.preprocessors.FastaRNABatchGenerator`_, `galaxy_ml.preprocessors.FastaProteinBatchGenerator`_, `galaxy_ml.preprocessors.GenomicIntervalBatchGenerator`_.
+
+- compile parameters, are mainly composed of loss function and optimizer.
+
+- fit parameters, a group of variables that control the training process, referring to `galaxy_ml.keras_galaxy_model.KerasGBatchClassifier`_ and `keras.io`_.
+
+- other parameters, including `class_positive_factor`, `prediction_steps`, `seed` (random seed) and so on.
+
+
+**Output**
+
+A zipped model file that could be used in `model_validation` tool or `hyperparameter search` tool.
+
+.. _`galaxy_ml.keras_galaxy_model.KerasGBatchClassifier`: https://goeckslab.github.io/Galaxy-ML/APIs/keras-galaxy-models/#kerasgbatchclassifier
+
+.. _`galaxy_ml.preprocessors.FastaDNABatchGenerator`: https://goeckslab.github.io/Galaxy-ML/APIs/keras-galaxy-models/#FastaDNABatchGenerator
+
+.. _`galaxy_ml.preprocessors.FastaRNABatchGenerator`: https://goeckslab.github.io/Galaxy-ML/APIs/keras-galaxy-models/#FastaRNABatchGenerator
+
+.. _`galaxy_ml.preprocessors.FastaProteinBatchGenerator`: https://goeckslab.github.io/Galaxy-ML/APIs/keras-galaxy-models/#FastaProteinBatchGenerator
+
+.. _`galaxy_ml.preprocessors.GenomicIntervalBatchGenerator`: https://goeckslab.github.io/Galaxy-ML/APIs/keras-galaxy-models/#GenomicIntervalBatchGenerator
+
+.. _`keras.io`: https://keras.io/models/model/#fit_generator
+
+      ]]>
+  </help>
+  <citations>
+    <expand macro="keras_citation"/>
+    <expand macro="tensorflow_citation"/>
+  </citations>
+</tool>