Mercurial > repos > bgruening > sklearn_svm_classifier
diff svm.xml @ 5:1c5989b930e3 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2a058459e6daf0486871f93845f00fdb4a4eaca1
author | bgruening |
---|---|
date | Sat, 29 Sep 2018 07:26:04 -0400 |
parents | 41d0edb7d1fc |
children | 1a9d5a8fff12 |
line wrap: on
line diff
--- a/svm.xml Thu Aug 23 16:14:13 2018 -0400 +++ b/svm.xml Sat Sep 29 07:26:04 2018 -0400 @@ -20,8 +20,9 @@ import sklearn.svm import pandas -execfile("$__tool_directory__/sk_whitelist.py") -execfile("$__tool_directory__/utils.py", globals()) +with open("$__tool_directory__/sk_whitelist.json", "r") as f: + sk_whitelist = json.load(f) +exec(open("$__tool_directory__/utils.py").read(), globals()) input_json_path = sys.argv[1] with open(input_json_path, "r") as param_handler: @@ -29,11 +30,12 @@ #if $selected_tasks.selected_task == "load": -with open("$infile_model", 'rb') as model_handler: - classifier_object = SafePickler.load(model_handler) - header = 'infer' if params["selected_tasks"]["header"] else None data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) + +with open("$infile_model", 'rb') as model_handler: + classifier_object = load_model(model_handler) + prediction = classifier_object.predict(data) prediction_df = pandas.DataFrame(prediction) res = pandas.concat([data, prediction_df], axis=1) @@ -55,7 +57,7 @@ classifier_object.fit(X, y) with open("$outfile_fit", 'wb') as out_handler: - pickle.dump(classifier_object, out_handler, pickle.HIGHEST_PROTOCOL) + pickle.dump(classifier_object, out_handler) #end if @@ -108,9 +110,7 @@ </when> </expand> </inputs> - <expand macro="output"/> - <tests> <test> <param name="infile1" value="train_set.tabular" ftype="tabular"/> @@ -122,7 +122,7 @@ <param name="selected_task" value="train"/> <param name="selected_algorithm" value="SVC"/> <param name="random_state" value="5"/> - <output name="outfile_fit" file="svc_model01.txt" compare="sim_size" delta="1"/> + <output name="outfile_fit" file="svc_model01" compare="sim_size"/> </test> <test> <param name="infile1" value="train_set.tabular" ftype="tabular"/> @@ -134,7 +134,7 @@ <param name="selected_task" value="train"/> <param name="selected_algorithm" value="NuSVC"/> <param name="random_state" value="5"/> - <output name="outfile_fit" file="svc_model02.txt" compare="sim_size" delta="1"/> + <output name="outfile_fit" file="svc_model02" compare="sim_size"/> </test> <test> <param name="infile1" value="train_set.tabular" ftype="tabular"/> @@ -146,29 +146,37 @@ <param name="selected_task" value="train"/> <param name="selected_algorithm" value="LinearSVC"/> <param name="random_state" value="5"/> - <output name="outfile_fit" file="svc_model03.txt" compare="sim_size" delta="1"/> + <output name="outfile_fit" file="svc_model03" compare="sim_size"/> </test> <test> - <param name="infile_model" value="svc_model01.txt" ftype="zip"/> + <param name="infile_model" value="svc_model01" ftype="zip"/> <param name="infile_data" value="test_set.tabular" ftype="tabular"/> <param name="header" value="True"/> <param name="selected_task" value="load"/> <output name="outfile_predict" file="svc_prediction_result01.tabular"/> </test> <test> - <param name="infile_model" value="svc_model02.txt" ftype="zip"/> + <param name="infile_model" value="svc_model02" ftype="zip"/> <param name="infile_data" value="test_set.tabular" ftype="tabular"/> <param name="header" value="True"/> <param name="selected_task" value="load"/> <output name="outfile_predict" file="svc_prediction_result02.tabular"/> </test> <test> - <param name="infile_model" value="svc_model03.txt" ftype="zip"/> + <param name="infile_model" value="svc_model03" ftype="zip"/> <param name="infile_data" value="test_set.tabular" ftype="tabular"/> <param name="header" value="True"/> <param name="selected_task" value="load"/> <output name="outfile_predict" file="svc_prediction_result03.tabular"/> </test> + <!-- The following test is expected to fail, it is testing the whitelist/blacklist filtering. + It loads a pickle with malicious content that we do not accept. --> + <test expect_failure="true"> + <param name="infile_model" value="pickle_blacklist" ftype="zip"/> + <param name="infile_data" value="test_set.tabular" ftype="tabular"/> + <param name="header" value="True"/> + <param name="selected_task" value="load"/> + </test> </tests> <help><![CDATA[ **What it does**