comparison svm.xml @ 13:78c664cc1841 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5d71c93a3dd804b1469852240a86021ab9130364
author bgruening
date Mon, 09 Jul 2018 14:31:08 -0400
parents 9a9396e5d153
children eaccbf2c2891
comparison
equal deleted inserted replaced
12:ada7bb28fe13 13:78c664cc1841
20 import numpy as np 20 import numpy as np
21 import sklearn.svm 21 import sklearn.svm
22 import pandas 22 import pandas
23 import pickle 23 import pickle
24 24
25 @COLUMNS_FUNCTION@
26 @GET_X_y_FUNCTION@
27
25 input_json_path = sys.argv[1] 28 input_json_path = sys.argv[1]
26 params = json.load(open(input_json_path, "r")) 29 params = json.load(open(input_json_path, "r"))
27 30
28 #if $selected_tasks.selected_task == "load": 31 #if $selected_tasks.selected_task == "load":
29 32
30 classifier_object = pickle.load(open("$infile_model", 'rb')) 33 classifier_object = pickle.load(open("$infile_model", 'rb'))
31 34
32 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ) 35 header = 'infer' if params["selected_tasks"]["header"] else None
36 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False)
33 prediction = classifier_object.predict(data) 37 prediction = classifier_object.predict(data)
34 prediction_df = pandas.DataFrame(prediction) 38 prediction_df = pandas.DataFrame(prediction)
35 res = pandas.concat([data, prediction_df], axis=1) 39 res = pandas.concat([data, prediction_df], axis=1)
36 res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False) 40 res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False)
37 41
38 #else: 42 #else:
39 43
40 data_train = pandas.read_csv("$selected_tasks.infile_train", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ) 44 X, y = get_X_y(params, "$selected_tasks.selected_algorithms.input_options.infile1" ,"$selected_tasks.selected_algorithms.input_options.infile2")
41
42 data = data_train.ix[:,0:len(data_train.columns)-1]
43 labels = np.array(data_train[data_train.columns[len(data_train.columns)-1]])
44 45
45 options = params["selected_tasks"]["selected_algorithms"]["options"] 46 options = params["selected_tasks"]["selected_algorithms"]["options"]
46 selected_algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"] 47 selected_algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"]
47 48
48 if not(selected_algorithm=="LinearSVC"): 49 if not(selected_algorithm=="LinearSVC"):
49 if options["kernel"]: 50 if options["kernel"]:
50 options["kernel"] = str(options["kernel"]) 51 options["kernel"] = str(options["kernel"])
51 52
52 my_class = getattr(sklearn.svm, selected_algorithm) 53 my_class = getattr(sklearn.svm, selected_algorithm)
53 classifier_object = my_class(**options) 54 classifier_object = my_class(**options)
54 classifier_object.fit(data,labels) 55 classifier_object.fit(X, y)
55 56
56 pickle.dump(classifier_object,open("$outfile_fit", 'w+')) 57 pickle.dump(classifier_object,open("$outfile_fit", 'w+'))
57 58
58 #end if 59 #end if
59 60
60 ]]> 61 ]]>
61 </configfile> 62 </configfile>
62 </configfiles> 63 </configfiles>
63 <inputs> 64 <inputs>
64 <expand macro="train_loadConditional" model="zip"> 65 <expand macro="sl_Conditional" model="zip">
65 <param name="selected_algorithm" type="select" label="Classifier type"> 66 <param name="selected_algorithm" type="select" label="Classifier type">
66 <option value="SVC">C-Support Vector Classification</option> 67 <option value="SVC">C-Support Vector Classification</option>
67 <option value="NuSVC">Nu-Support Vector Classification</option> 68 <option value="NuSVC">Nu-Support Vector Classification</option>
68 <option value="LinearSVC">Linear Support Vector Classification</option> 69 <option value="LinearSVC">Linear Support Vector Classification</option>
69 </param> 70 </param>
70 <when value="SVC"> 71 <when value="SVC">
72 <expand macro="sl_mixed_input"/>
71 <expand macro="svc_advanced_options"> 73 <expand macro="svc_advanced_options">
72 <expand macro="C"/> 74 <expand macro="C"/>
73 </expand> 75 </expand>
74 </when> 76 </when>
75 <when value="NuSVC"> 77 <when value="NuSVC">
78 <expand macro="sl_mixed_input"/>
76 <expand macro="svc_advanced_options"> 79 <expand macro="svc_advanced_options">
77 <param argument="nu" type="float" optional="true" value="0.5" label="Nu control parameter" help="Controls the number of support vectors. Should be in the interval (0, 1]. "/> 80 <param argument="nu" type="float" optional="true" value="0.5" label="Nu control parameter" help="Controls the number of support vectors. Should be in the interval (0, 1]. "/>
78 </expand> 81 </expand>
79 </when> 82 </when>
80 <when value="LinearSVC"> 83 <when value="LinearSVC">
84 <expand macro="sl_mixed_input"/>
81 <section name="options" title="Advanced Options" expanded="False"> 85 <section name="options" title="Advanced Options" expanded="False">
82 <expand macro="C"/> 86 <expand macro="C"/>
83 <expand macro="tol" default_value="0.001" help_text="Tolerance for stopping criterion. "/> 87 <expand macro="tol" default_value="0.001" help_text="Tolerance for stopping criterion. "/>
84 <expand macro="random_state" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data for probability estimation. A fixed seed allows reproducible results."/> 88 <expand macro="random_state" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data for probability estimation. A fixed seed allows reproducible results."/>
85 <!--expand macro="class_weight"/--> 89 <!--expand macro="class_weight"/-->
106 110
107 <expand macro="output"/> 111 <expand macro="output"/>
108 112
109 <tests> 113 <tests>
110 <test> 114 <test>
111 <param name="infile_train" value="train_set.tabular" ftype="tabular"/> 115 <param name="infile1" value="train_set.tabular" ftype="tabular"/>
116 <param name="infile2" value="train_set.tabular" ftype="tabular"/>
117 <param name="header1" value="True"/>
118 <param name="header2" value="True"/>
119 <param name="col1" value="1,2,3,4"/>
120 <param name="col2" value="5"/>
112 <param name="selected_task" value="train"/> 121 <param name="selected_task" value="train"/>
113 <param name="selected_algorithm" value="SVC"/> 122 <param name="selected_algorithm" value="SVC"/>
114 <param name="random_state" value="5"/> 123 <param name="random_state" value="5"/>
115 <output name="outfile_fit" file="svc_model01.txt"/> 124 <output name="outfile_fit" file="svc_model01.txt"/>
116 </test> 125 </test>
117 <test> 126 <test>
118 <param name="infile_train" value="train_set.tabular" ftype="tabular"/> 127 <param name="infile1" value="train_set.tabular" ftype="tabular"/>
128 <param name="infile2" value="train_set.tabular" ftype="tabular"/>
129 <param name="header1" value="True"/>
130 <param name="header2" value="True"/>
131 <param name="col1" value="1,2,3,4"/>
132 <param name="col2" value="5"/>
119 <param name="selected_task" value="train"/> 133 <param name="selected_task" value="train"/>
120 <param name="selected_algorithm" value="NuSVC"/> 134 <param name="selected_algorithm" value="NuSVC"/>
121 <param name="random_state" value="5"/> 135 <param name="random_state" value="5"/>
122 <output name="outfile_fit" file="svc_model02.txt"/> 136 <output name="outfile_fit" file="svc_model02.txt"/>
123 </test> 137 </test>
124 <test> 138 <test>
125 <param name="infile_train" value="train_set.tabular" ftype="tabular"/> 139 <param name="infile1" value="train_set.tabular" ftype="tabular"/>
140 <param name="infile2" value="train_set.tabular" ftype="tabular"/>
141 <param name="header1" value="True"/>
142 <param name="header2" value="True"/>
143 <param name="col1" value="1,2,3,4"/>
144 <param name="col2" value="5"/>
126 <param name="selected_task" value="train"/> 145 <param name="selected_task" value="train"/>
127 <param name="selected_algorithm" value="LinearSVC"/> 146 <param name="selected_algorithm" value="LinearSVC"/>
128 <param name="random_state" value="5"/> 147 <param name="random_state" value="5"/>
129 <output name="outfile_fit" file="svc_model03.txt"/> 148 <output name="outfile_fit" file="svc_model03.txt"/>
130 </test> 149 </test>
131 <test> 150 <test>
132 <param name="infile_model" value="svc_model01.txt" ftype="txt"/> 151 <param name="infile_model" value="svc_model01.txt" ftype="txt"/>
133 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> 152 <param name="infile_data" value="test_set.tabular" ftype="tabular"/>
153 <param name="header" value="True"/>
134 <param name="selected_task" value="load"/> 154 <param name="selected_task" value="load"/>
135 <output name="outfile_predict" file="svc_prediction_result01.tabular"/> 155 <output name="outfile_predict" file="svc_prediction_result01.tabular"/>
136 </test> 156 </test>
137 <test> 157 <test>
138 <param name="infile_model" value="svc_model02.txt" ftype="txt"/> 158 <param name="infile_model" value="svc_model02.txt" ftype="txt"/>
139 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> 159 <param name="infile_data" value="test_set.tabular" ftype="tabular"/>
160 <param name="header" value="True"/>
140 <param name="selected_task" value="load"/> 161 <param name="selected_task" value="load"/>
141 <output name="outfile_predict" file="svc_prediction_result02.tabular"/> 162 <output name="outfile_predict" file="svc_prediction_result02.tabular"/>
142 </test> 163 </test>
143 <test> 164 <test>
144 <param name="infile_model" value="svc_model03.txt" ftype="txt"/> 165 <param name="infile_model" value="svc_model03.txt" ftype="txt"/>
145 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> 166 <param name="infile_data" value="test_set.tabular" ftype="tabular"/>
167 <param name="header" value="True"/>
146 <param name="selected_task" value="load"/> 168 <param name="selected_task" value="load"/>
147 <output name="outfile_predict" file="svc_prediction_result03.tabular"/> 169 <output name="outfile_predict" file="svc_prediction_result03.tabular"/>
148 </test> 170 </test>
149 </tests> 171 </tests>
150 <help><![CDATA[ 172 <help><![CDATA[