diff svm.xml @ 5:1c5989b930e3 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
author bgruening
date Sat, 29 Sep 2018 07:26:04 -0400
parents 41d0edb7d1fc
children 1a9d5a8fff12
line wrap: on
line diff
--- a/svm.xml	Thu Aug 23 16:14:13 2018 -0400
+++ b/svm.xml	Sat Sep 29 07:26:04 2018 -0400
@@ -20,8 +20,9 @@
 import sklearn.svm
 import pandas
 
-execfile("$__tool_directory__/sk_whitelist.py")
-execfile("$__tool_directory__/utils.py", globals())
+with open("$__tool_directory__/sk_whitelist.json", "r") as f:
+    sk_whitelist = json.load(f)
+exec(open("$__tool_directory__/utils.py").read(), globals())
 
 input_json_path = sys.argv[1]
 with open(input_json_path, "r") as param_handler:
@@ -29,11 +30,12 @@
 
 #if $selected_tasks.selected_task == "load":
 
-with open("$infile_model", 'rb') as model_handler:
-    classifier_object = SafePickler.load(model_handler)
-
 header = 'infer' if params["selected_tasks"]["header"] else None
 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False)
+
+with open("$infile_model", 'rb') as model_handler:
+    classifier_object = load_model(model_handler)
+
 prediction = classifier_object.predict(data)
 prediction_df = pandas.DataFrame(prediction)
 res = pandas.concat([data, prediction_df], axis=1)
@@ -55,7 +57,7 @@
 classifier_object.fit(X, y)
 
 with open("$outfile_fit", 'wb') as out_handler:
-    pickle.dump(classifier_object, out_handler, pickle.HIGHEST_PROTOCOL)
+    pickle.dump(classifier_object, out_handler)
 
 #end if
 
@@ -108,9 +110,7 @@
             </when>
         </expand>
     </inputs>
-
     <expand macro="output"/>
-
     <tests>
         <test>
             <param name="infile1" value="train_set.tabular" ftype="tabular"/>
@@ -122,7 +122,7 @@
             <param name="selected_task" value="train"/>
             <param name="selected_algorithm" value="SVC"/>
             <param name="random_state" value="5"/>
-            <output name="outfile_fit" file="svc_model01.txt" compare="sim_size" delta="1"/>
+            <output name="outfile_fit" file="svc_model01" compare="sim_size"/>
         </test>
         <test>
             <param name="infile1" value="train_set.tabular" ftype="tabular"/>
@@ -134,7 +134,7 @@
             <param name="selected_task" value="train"/>
             <param name="selected_algorithm" value="NuSVC"/>
             <param name="random_state" value="5"/>
-            <output name="outfile_fit" file="svc_model02.txt" compare="sim_size" delta="1"/>
+            <output name="outfile_fit" file="svc_model02" compare="sim_size"/>
         </test>
         <test>
             <param name="infile1" value="train_set.tabular" ftype="tabular"/>
@@ -146,29 +146,37 @@
             <param name="selected_task" value="train"/>
             <param name="selected_algorithm" value="LinearSVC"/>
             <param name="random_state" value="5"/>
-            <output name="outfile_fit" file="svc_model03.txt" compare="sim_size" delta="1"/>
+            <output name="outfile_fit" file="svc_model03" compare="sim_size"/>
         </test>
         <test>
-            <param name="infile_model" value="svc_model01.txt" ftype="zip"/>
+            <param name="infile_model" value="svc_model01" ftype="zip"/>
             <param name="infile_data" value="test_set.tabular" ftype="tabular"/>
             <param name="header" value="True"/>
             <param name="selected_task" value="load"/>
             <output name="outfile_predict" file="svc_prediction_result01.tabular"/>
         </test>
         <test>
-            <param name="infile_model" value="svc_model02.txt" ftype="zip"/>
+            <param name="infile_model" value="svc_model02" ftype="zip"/>
             <param name="infile_data" value="test_set.tabular" ftype="tabular"/>
             <param name="header" value="True"/>
             <param name="selected_task" value="load"/>
             <output name="outfile_predict" file="svc_prediction_result02.tabular"/>
         </test>
         <test>
-            <param name="infile_model" value="svc_model03.txt" ftype="zip"/>
+            <param name="infile_model" value="svc_model03" ftype="zip"/>
             <param name="infile_data" value="test_set.tabular" ftype="tabular"/>
             <param name="header" value="True"/>
             <param name="selected_task" value="load"/>
             <output name="outfile_predict" file="svc_prediction_result03.tabular"/>
         </test>
+        <!-- The following test is expected to fail, it is testing the whitelist/blacklist filtering.
+        It loads a pickle with malicious content that we do not accept. -->
+        <test expect_failure="true">
+            <param name="infile_model" value="pickle_blacklist" ftype="zip"/>
+            <param name="infile_data" value="test_set.tabular" ftype="tabular"/>
+            <param name="header" value="True"/>
+            <param name="selected_task" value="load"/>
+        </test>
     </tests>
     <help><![CDATA[
 **What it does**