changeset 1:09efff9a5765 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 02087ce2966cf8b4aac9197a41171e7f986c11d1-dirty"
author bgruening
date Wed, 02 Oct 2019 03:50:11 -0400
parents f96efab83b65
children dd13740e8fdc
files main_macros.xml ml_visualization_ex.py ml_visualization_ex.xml stacking_ensembles.py
diffstat 4 files changed, 47 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/main_macros.xml	Fri Sep 13 12:23:39 2019 -0400
+++ b/main_macros.xml	Wed Oct 02 03:50:11 2019 -0400
@@ -421,27 +421,46 @@
 
   <xml name="sl_mixed_input">
     <conditional name="input_options">
-      <param name="selected_input" type="select" label="Select input type:">
-          <option value="tabular" selected="true">tabular data</option>
-          <option value="sparse">sparse matrix</option>
-          <option value="seq_fasta">sequnences in a fasta file</option>
-          <option value="refseq_and_interval">reference genome and intervals</option>
-      </param>
-      <when value="tabular">
-          <expand macro="samples_tabular" multiple1="true" multiple2="false"/>
-      </when>
-      <when value="sparse">
-          <expand macro="sparse_target"/>
-      </when>
-      <when value="seq_fasta">
-          <expand macro="inputs_seq_fasta"/>
-      </when>
-      <when value="refseq_and_interval">
-          <expand macro="inputs_refseq_and_interval"/>
-      </when>
+        <expand macro="data_input_options"/>
+        <expand macro="data_input_whens"/>
     </conditional>
   </xml>
 
+  <xml name="sl_mixed_input_plus_sequence">
+    <conditional name="input_options">
+        <expand macro="data_input_options">
+            <option value="seq_fasta">sequnences in a fasta file</option>
+            <option value="refseq_and_interval">reference genome and intervals</option>
+        </expand>
+        <expand macro="data_input_whens">
+            <when value="seq_fasta">
+                <expand macro="inputs_seq_fasta"/>
+            </when>
+            <when value="refseq_and_interval">
+                <expand macro="inputs_refseq_and_interval"/>
+            </when>
+        </expand>
+    </conditional>
+  </xml>
+
+  <xml name="data_input_options">
+    <param name="selected_input" type="select" label="Select input type:">
+        <option value="tabular" selected="true">tabular data</option>
+        <option value="sparse">sparse matrix</option>
+        <yield/>
+    </param>
+  </xml>
+
+  <xml name="data_input_whens">
+    <when value="tabular">
+        <expand macro="samples_tabular" multiple1="true" multiple2="false"/>
+    </when>
+    <when value="sparse">
+        <expand macro="sparse_target"/>
+    </when>
+    <yield/>
+  </xml>
+
   <xml name="input_tabular_target">
     <param name="infile2" type="data" format="tabular" label="Dataset containing class labels or target values:"/>
     <param name="header2" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Does the dataset contain header:" />
--- a/ml_visualization_ex.py	Fri Sep 13 12:23:39 2019 -0400
+++ b/ml_visualization_ex.py	Wed Oct 02 03:50:11 2019 -0400
@@ -146,7 +146,8 @@
             precision["micro"], recall["micro"], _ = precision_recall_curve(
                 df1.values.ravel(), df2.values.ravel(), pos_label=pos_label)
             ap['micro'] = average_precision_score(
-                df1.values, df2.values, average='micro', pos_label=pos_label or 1)
+                df1.values, df2.values, average='micro',
+                pos_label=pos_label or 1)
 
         data = []
         for key in precision.keys():
@@ -201,7 +202,7 @@
             )
             data.append(trace)
 
-        trace = go.Scatter(x=[0, 1], y=[0, 1], 
+        trace = go.Scatter(x=[0, 1], y=[0, 1],
                            mode='lines', 
                            line=dict(color='black', dash='dash'),
                            showlegend=False)
--- a/ml_visualization_ex.xml	Fri Sep 13 12:23:39 2019 -0400
+++ b/ml_visualization_ex.xml	Wed Oct 02 03:50:11 2019 -0400
@@ -35,8 +35,8 @@
         <conditional name="plotting_selection">
             <param name="plot_type" type="select" label="Select a plotting type">
                 <option value="learning_curve" selected="true">Learning curve</option>
-                <option value="pr_curve">2-class Precison Recall curve</option>
-                <option value="roc_curve">2-class Receiver Operating Characteristic (ROC) curve</option>
+                <option value="pr_curve">2-class / multpi-label Precison Recall curve</option>
+                <option value="roc_curve">2-class / multi-label Receiver Operating Characteristic (ROC) curve</option>
                 <option value="rfecv_gridscores">Number of features vs. Recursive Feature Elimination gridscores with corss-validation</option>
                 <option value="feature_importances">Feature Importances plot</option>
                 <option value="keras_plot_model">keras plot model - plot configuration of a neural network model</option>
@@ -47,14 +47,14 @@
                 <param name="title" type="text" value="" optional="true" label="Plot title" help="Optional. If change is desired."/>
             </when>
             <when value="pr_curve">
-                <param name="infile1" type="data" format="tabular" label="Select the dataset containing true labels." help="No headers. Each column corresponds to one class."/>
-                <param name="infile2" type="data" format="tabular" label="Select the dataset containing predicted probabilities." help="No headers. Each column corresponds to one class."/>
+                <param name="infile1" type="data" format="tabular" label="Select the dataset containing true labels." help="No headers. For 2-class, single column contains both class labels (e.g. True and False). For multi-label, each column, hot-encoded, corresponds to one label."/>
+                <param name="infile2" type="data" format="tabular" label="Select the dataset containing predicted probabilities." help="No headers. For 2-class, sinle column or the first column contains scores for the positive label. For multi-label, each column corresponds to one label."/>
                 <param name="pos_label" type="text" value="" optional="true" label="pos_label" help="The label of positive class. If not specified, it will be 1 by default."/>
                 <param name="title" type="text" value="" optional="true" label="Plot title" help="Optional. If change is desired."/>
             </when>
             <when value="roc_curve">
-                <param name="infile1" type="data" format="tabular" label="Select the dataset containing true labels." help="No headers. Each column corresponds to one class."/>
-                <param name="infile2" type="data" format="tabular" label="Select the dataset containing predicted probabilities." help="No headers. Each column corresponds to one class."/>
+                <param name="infile1" type="data" format="tabular" label="Select the dataset containing true labels." help="No headers. For 2-class, single column contains both class labels (e.g. True and False). For multi-label, each column, hot-encoded, corresponds to one label."/>
+                <param name="infile2" type="data" format="tabular" label="Select the dataset containing predicted probabilities." help="No headers. For 2-class, sinle column or the first column contains scores for the positive label. For multi-label, each column corresponds to one label."/>
                 <param name="pos_label" type="text" value="" optional="true" label="pos_label" help="The label of positive class. If not specified, it will be 1 by default."/>
                 <param name="drop_intermediate" type="boolean" truevalue="booltrue" falsevalue="boolfalse" optional="true" checked="true" label="drop_intermediate" help="Whether to drop some suboptimal thresholds which would not appear on a plotted ROC curve."/>
                 <param name="title" type="text" value="" optional="true" label="Plot title" help="Optional. If change is desired."/>
--- a/stacking_ensembles.py	Fri Sep 13 12:23:39 2019 -0400
+++ b/stacking_ensembles.py	Wed Oct 02 03:50:11 2019 -0400
@@ -11,7 +11,7 @@
 from sklearn import ensemble
 
 from galaxy_ml.utils import (load_model, get_cv, get_estimator,
-                          get_search_params)
+                             get_search_params)
 
 
 warnings.filterwarnings('ignore')