view keras_batch_models.xml @ 1:ed4d31f47d65 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
author bgruening
date Fri, 13 Sep 2019 12:14:17 -0400
parents 000a3868885b
children 62b85c8ad8fa
line wrap: on
line source

<tool id="keras_batch_models" name="Build Deep learning Batch Training Models" version="@KERAS_VERSION@">
  <description>with online data generator for Genomic/Protein sequences and images</description>
  <macros>
    <import>main_macros.xml</import>
    <import>keras_macros.xml</import>
  </macros>
  <expand macro="python_requirements"/>
  <expand macro="macro_stdio"/>
  <version_command>echo "@KERAS_VERSION@"</version_command>
  <command>
    <![CDATA[
    python '$__tool_directory__/keras_deep_learning.py'
           --inputs '$inputs'
           --model_json '$mode_selection.infile_json'
           --tool_id 'keras_batch_models'
           --outfile '$outfile'
           #if $get_params
           --outfile_params '$outfile_params'
           #end if
    ]]>
  </command>
  <configfiles>
    <inputs name="inputs"/>
  </configfiles>
  <inputs>
    <conditional name="mode_selection">
      <param name="mode_type" type="select" label="Choose a building mode">
        <option value="train_model" selected="true">Build a training model</option>
      </param>
      <when value="train_model">
        <param name="infile_json" type="data" format="json" label="Select the dataset containing model configurations (JSON)"/>
        <param name="learning_type" type="select" label="Select a learning class">
          <option value="KerasGBatchClassifier">KerasGBatchClassifier -- Build a training model with batch data generator</option>
        </param>
        <conditional name="generator_selection">
          <param name="generator_type" type="select" label="Select a batch data generator">
            <option value="FastaDNABatchGenerator" selected="true">FastaDNABatchGenerator -- Online transformation of DNA sequences</option>
            <option value="FastaRNABatchGenerator">FastaRNABatchGenerator -- Online transformation of RNA sequences</option>
            <option value="FastaProteinBatchGenerator">FastaProteinBatchGenerator -- Online transformation of Protein sequences</option>
            <option value="GenomicIntervalBatchGenerator">GenomicIntervalBatchGenerator - Online transformation of genomic sequences from a reference genome and intervals</option>
            <!--option value="ImageBatchGenerator">ImageBatchGenerator - Online transformation of images</option-->
          </param>
          <when value="FastaDNABatchGenerator">
            <expand macro="params_fasta_dna_batch_generator"/>
          </when>
          <when value="FastaRNABatchGenerator">
            <expand macro="params_fasta_dna_batch_generator"/>
          </when>
          <when value="FastaProteinBatchGenerator">
            <expand macro="params_fasta_protein_batch_generator"/>
          </when>
          <when value="GenomicIntervalBatchGenerator">
            <expand macro="params_genomic_interval_batch_generator"/>
          </when>
          <!--when value="ImageBatchGenerator">
            <expand macro="params_image_batch_generator"/>
          </when-->
        </conditional>
        <expand macro="keras_compile_params_section"/>
        <expand macro="keras_fit_params_section"/>
        <param name="class_positive_factor" type="float" value="1" optional="true" label="class_positive_factor" help="For binary classification only. If int, like 5, will convert to class_weight {0: 1, 1: 5}. If float, 0.2, corresponds to class_weight {0: 1/0.2, 1: 1}"/>
        <param name="prediction_steps" type="integer" value="" optional="true" label="prediction_steps" help="Prediction steps. Optional. If None, it equals number of samples divided by `batch_size`."/>
        <param name="random_seed" type="integer" value="" optional="true" label="Random Seed" help="Integer or blank for None. Warning: when random seed is set to an integer, training will be running in single thread mode, which may cause slowness."/>
      </when>
    </conditional>
    <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Output parameters for searchCV?" help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool."/>
  </inputs>
  <outputs>
    <data format="zip" name="outfile" label="Keras Batch Classifier  on ${on_string}"/>
    <data format="tabular" name="outfile_params" label="get_params for Keras Batch Classifier on ${on_string}">
      <filter>get_params</filter>
    </data>
  </outputs>
  <tests>
    <test>
      <conditional name="mode_selection">
        <param name="infile_json" value="deepsear_1feature.json" ftype="json"/>
        <param name="learning_type" value="KerasGBatchClassifier"/>
        <conditional name="generator_selection">
          <param name="generator_type" value="GenomicIntervalBatchGenerator"/>
          <param name="seed" value="999"/>
        </conditional>
        <section name="fit_params">
          <param name="epochs" value="100"/>
        </section>
      </conditional>
      <output name="outfile" file="keras_batch_model03" compare="sim_size" delta="5"/>
    </test>
    <test>
      <conditional name="mode_selection">
        <param name="infile_json" value="keras01.json" ftype="json"/>
        <param name="learning_type" value="KerasGBatchClassifier"/>
        <conditional name="generator_selection">
          <param name="generator_type" value="FastaDNABatchGenerator"/>
          <param name="seed" value="999"/>
        </conditional>
        <section name="fit_params">
          <param name="epochs" value="100"/>
          <repeat name="callbacks">
            <conditional name="callback_selection">
              <param name="callback_type" value="EarlyStopping"/>
              <param name="monitor" value="val_loss"/>
            </conditional>
          </repeat>
        </section>
      </conditional>
      <output name="outfile" file="keras_batch_model02" compare="sim_size" delta="5"/>
    </test>
    <test>
      <conditional name="mode_selection">
        <param name="infile_json" value="keras01.json" ftype="json"/>
        <param name="learning_type" value="KerasGBatchClassifier"/>
        <conditional name="generator_selection">
          <param name="generator_type" value="FastaDNABatchGenerator"/>
          <param name="seed" value="999"/>
        </conditional>
        <section name="fit_params">
          <param name="epochs" value="100"/>
          <repeat name="callbacks">
            <conditional name="callback_selection">
              <param name="callback_type" value="None"/>
            </conditional>
          </repeat>
        </section>
      </conditional>
      <param name="get_params" value="true" />
      <output name="outfile" file="keras_batch_model01" compare="sim_size" delta="5"/>
      <output name="outfile_params" file="keras_batch_params01.tabular" />
    </test>
  </tests>
  <help>
      <![CDATA[
**What does this tool do?**

This tool builds deep learning training models using API `galaxy_ml.keras_galaxy_model.KerasGBatchClassifier`_, which takes parameters in FIVE categories. 

- a JSON file that contains layer information for a deep learning model.

- a data batch generator that converts raw data, such as images and genomic sequences, into numerical data to be able to fit the deep learning model. That the cycle of `batch conversion - fitting` occur in stream mode, also called on-line transformation, guarantees the training to be CPU and memory efficient. Reference: `galaxy_ml.preprocessors.FastaDNABatchGenerator`_, `galaxy_ml.preprocessors.FastaRNABatchGenerator`_, `galaxy_ml.preprocessors.FastaProteinBatchGenerator`_, `galaxy_ml.preprocessors.GenomicIntervalBatchGenerator`_.

- compile parameters, are mainly composed of loss function and optimizer.

- fit parameters, a group of variables that control the training process, referring to `galaxy_ml.keras_galaxy_model.KerasGBatchClassifier`_ and `keras.io`_.

- other parameters, including `class_positive_factor`, `prediction_steps`, `seed` (random seed) and so on.


**Output**

A zipped model file that could be used in `model_validation` tool or `hyperparameter search` tool.

.. _`galaxy_ml.keras_galaxy_model.KerasGBatchClassifier`: https://goeckslab.github.io/Galaxy-ML/APIs/keras-galaxy-models/#kerasgbatchclassifier

.. _`galaxy_ml.preprocessors.FastaDNABatchGenerator`: https://goeckslab.github.io/Galaxy-ML/APIs/keras-galaxy-models/#FastaDNABatchGenerator

.. _`galaxy_ml.preprocessors.FastaRNABatchGenerator`: https://goeckslab.github.io/Galaxy-ML/APIs/keras-galaxy-models/#FastaRNABatchGenerator

.. _`galaxy_ml.preprocessors.FastaProteinBatchGenerator`: https://goeckslab.github.io/Galaxy-ML/APIs/keras-galaxy-models/#FastaProteinBatchGenerator

.. _`galaxy_ml.preprocessors.GenomicIntervalBatchGenerator`: https://goeckslab.github.io/Galaxy-ML/APIs/keras-galaxy-models/#GenomicIntervalBatchGenerator

.. _`keras.io`: https://keras.io/models/model/#fit_generator

      ]]>
  </help>
  <citations>
    <expand macro="keras_citation"/>
    <expand macro="tensorflow_citation"/>
  </citations>
</tool>