comparison nn_classifier.xml @ 13:478034e9826b draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5d71c93a3dd804b1469852240a86021ab9130364
author bgruening
date Mon, 09 Jul 2018 14:31:48 -0400
parents d638aa11a4f0
children adec53d64383
comparison
equal deleted inserted replaced
12:20cd3b38d194 13:478034e9826b
19 import numpy as np 19 import numpy as np
20 import sklearn.neighbors 20 import sklearn.neighbors
21 import pandas 21 import pandas
22 import pickle 22 import pickle
23 23
24 @COLUMNS_FUNCTION@
25 @GET_X_y_FUNCTION@
26
24 input_json_path = sys.argv[1] 27 input_json_path = sys.argv[1]
25 params = json.load(open(input_json_path, "r")) 28 params = json.load(open(input_json_path, "r"))
26 29
27 30
28 #if $selected_tasks.selected_task == "load": 31 #if $selected_tasks.selected_task == "load":
29 32
30 classifier_object = pickle.load(open("$infile_model", 'r')) 33 classifier_object = pickle.load(open("$infile_model", 'r'))
31 34
32 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ) 35 header = 'infer' if params["selected_tasks"]["header"] else None
36 data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False)
33 prediction = classifier_object.predict(data) 37 prediction = classifier_object.predict(data)
34 prediction_df = pandas.DataFrame(prediction) 38 prediction_df = pandas.DataFrame(prediction)
35 res = pandas.concat([data, prediction_df], axis=1) 39 res = pandas.concat([data, prediction_df], axis=1)
36 res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False) 40 res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False)
37 41
38 #else: 42 #else:
39 43
40 data_train = pandas.read_csv("$selected_tasks.infile_train", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ) 44 X, y = get_X_y(params, "$selected_tasks.selected_algorithms.input_options.infile1" ,"$selected_tasks.selected_algorithms.input_options.infile2")
41
42 data = data_train.ix[:,0:len(data_train.columns)-1]
43 labels = np.array(data_train[data_train.columns[len(data_train.columns)-1]])
44 45
45 selected_algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"] 46 selected_algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"]
46 47
47 if selected_algorithm == "nneighbors": 48 if selected_algorithm == "nneighbors":
48 classifier = params["selected_tasks"]["selected_algorithms"]["sampling_methods"]["sampling_method"] 49 classifier = params["selected_tasks"]["selected_algorithms"]["sampling_methods"]["sampling_method"]
53 options = params["selected_tasks"]["selected_algorithms"]["options"] 54 options = params["selected_tasks"]["selected_algorithms"]["options"]
54 classifier = "NearestCentroid" 55 classifier = "NearestCentroid"
55 56
56 my_class = getattr(sklearn.neighbors, classifier) 57 my_class = getattr(sklearn.neighbors, classifier)
57 classifier_object = my_class(**options) 58 classifier_object = my_class(**options)
58 classifier_object.fit(data,labels) 59 classifier_object.fit(X, y)
59 60
60 pickle.dump(classifier_object,open("$outfile_fit", 'w+')) 61 pickle.dump(classifier_object,open("$outfile_fit", 'w+'))
61 62
62 #end if 63 #end if
63 64
64 ]]> 65 ]]>
65 </configfile> 66 </configfile>
66 </configfiles> 67 </configfiles>
67 <inputs> 68 <inputs>
68 <expand macro="train_loadConditional" model="zip"><!--Todo: add sparse to targets--> 69 <expand macro="sl_Conditional" model="zip"><!--Todo: add sparse to targets-->
69 <param name="selected_algorithm" type="select" label="Classifier type"> 70 <param name="selected_algorithm" type="select" label="Classifier type">
70 <option value="nneighbors">Nearest Neighbors</option> 71 <option value="nneighbors">Nearest Neighbors</option>
71 <option value="ncentroid">Nearest Centroid</option> 72 <option value="ncentroid">Nearest Centroid</option>
72 </param> 73 </param>
73 <when value="nneighbors"> 74 <when value="nneighbors">
75 <expand macro="sl_mixed_input"/>
74 <conditional name="sampling_methods"> 76 <conditional name="sampling_methods">
75 <param name="sampling_method" type="select" label="Neighbor selection method"> 77 <param name="sampling_method" type="select" label="Neighbor selection method">
76 <option value="KNeighborsClassifier" selected="true">K-nearest neighbors</option> 78 <option value="KNeighborsClassifier" selected="true">K-nearest neighbors</option>
77 <option value="RadiusNeighborsClassifier">Radius-based</option> 79 <option value="RadiusNeighborsClassifier">Radius-based</option>
78 </param> 80 </param>
88 </expand> 90 </expand>
89 </when> 91 </when>
90 </conditional> 92 </conditional>
91 </when> 93 </when>
92 <when value="ncentroid"> 94 <when value="ncentroid">
95 <expand macro="sl_mixed_input"/>
93 <section name="options" title="Advanced Options" expanded="False"> 96 <section name="options" title="Advanced Options" expanded="False">
94 <param argument="metric" type="text" optional="true" value="euclidean" label="Metric" 97 <param argument="metric" type="text" optional="true" value="euclidean" label="Metric"
95 help="The metric to use when calculating distance between instances in a feature array."/> 98 help="The metric to use when calculating distance between instances in a feature array."/>
96 <param argument="shrink_threshold" type="float" optional="true" value="" label="Shrink threshold" 99 <param argument="shrink_threshold" type="float" optional="true" value="" label="Shrink threshold"
97 help="Floating point number for shrinking centroids to remove features."/> 100 help="Floating point number for shrinking centroids to remove features."/>
102 105
103 <expand macro="output"/> 106 <expand macro="output"/>
104 107
105 <tests> 108 <tests>
106 <test> 109 <test>
107 <param name="infile_train" value="train_set.tabular" ftype="tabular"/> 110 <param name="infile1" value="train_set.tabular" ftype="tabular"/>
111 <param name="infile2" value="train_set.tabular" ftype="tabular"/>
112 <param name="header1" value="True"/>
113 <param name="header2" value="True"/>
114 <param name="col1" value="1,2,3,4"/>
115 <param name="col2" value="5"/>
108 <param name="selected_task" value="train"/> 116 <param name="selected_task" value="train"/>
109 <param name="selected_algorithm" value="nneighbors"/> 117 <param name="selected_algorithm" value="nneighbors"/>
110 <param name="sampling_method" value="KNeighborsClassifier" /> 118 <param name="sampling_method" value="KNeighborsClassifier" />
111 <param name="algorithm" value="brute" /> 119 <param name="algorithm" value="brute" />
112 <output name="outfile_fit" file="nn_model01.txt"/> 120 <output name="outfile_fit" file="nn_model01.txt"/>
113 </test> 121 </test>
114 <test> 122 <test>
115 <param name="infile_train" value="train_set.tabular" ftype="tabular"/> 123 <param name="infile1" value="train_set.tabular" ftype="tabular"/>
124 <param name="infile2" value="train_set.tabular" ftype="tabular"/>
125 <param name="header1" value="True"/>
126 <param name="header2" value="True"/>
127 <param name="col1" value="1,2,3,4"/>
128 <param name="col2" value="5"/>
116 <param name="selected_task" value="train"/> 129 <param name="selected_task" value="train"/>
117 <param name="selected_algorithm" value=""/> 130 <param name="selected_algorithm" value=""/>
118 <param name="selected_algorithm" value="nneighbors"/> 131 <param name="selected_algorithm" value="nneighbors"/>
119 <param name="sampling_method" value="RadiusNeighborsClassifier" /> 132 <param name="sampling_method" value="RadiusNeighborsClassifier" />
120 <output name="outfile_fit" file="nn_model02.txt"/> 133 <output name="outfile_fit" file="nn_model02.txt"/>
121 </test> 134 </test>
122 <test> 135 <test>
123 <param name="infile_train" value="train_set.tabular" ftype="tabular"/> 136 <param name="infile1" value="train_set.tabular" ftype="tabular"/>
137 <param name="infile2" value="train_set.tabular" ftype="tabular"/>
138 <param name="header1" value="True"/>
139 <param name="header2" value="True"/>
140 <param name="col1" value="1,2,3,4"/>
141 <param name="col2" value="5"/>
124 <param name="selected_task" value="train"/> 142 <param name="selected_task" value="train"/>
125 <param name="selected_algorithm" value="ncentroid"/> 143 <param name="selected_algorithm" value="ncentroid"/>
126 <output name="outfile_fit" file="nn_model03.txt"/> 144 <output name="outfile_fit" file="nn_model03.txt"/>
127 </test> 145 </test>
128 <test> 146 <test>
129 <param name="infile_model" value="nn_model01.txt" ftype="txt"/> 147 <param name="infile_model" value="nn_model01.txt" ftype="txt"/>
130 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> 148 <param name="infile_data" value="test_set.tabular" ftype="tabular"/>
149 <param name="header" value="True"/>
131 <param name="selected_task" value="load"/> 150 <param name="selected_task" value="load"/>
132 <output name="outfile_predict" file="nn_prediction_result01.tabular"/> 151 <output name="outfile_predict" file="nn_prediction_result01.tabular"/>
133 </test> 152 </test>
134 <test> 153 <test>
135 <param name="infile_model" value="nn_model02.txt" ftype="txt"/> 154 <param name="infile_model" value="nn_model02.txt" ftype="txt"/>
136 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> 155 <param name="infile_data" value="test_set.tabular" ftype="tabular"/>
156 <param name="header" value="True"/>
137 <param name="selected_task" value="load"/> 157 <param name="selected_task" value="load"/>
138 <output name="outfile_predict" file="nn_prediction_result02.tabular"/> 158 <output name="outfile_predict" file="nn_prediction_result02.tabular"/>
139 </test> 159 </test>
140 <test> 160 <test>
141 <param name="infile_model" value="nn_model03.txt" ftype="txt"/> 161 <param name="infile_model" value="nn_model03.txt" ftype="txt"/>
142 <param name="infile_data" value="test_set.tabular" ftype="tabular"/> 162 <param name="infile_data" value="test_set.tabular" ftype="tabular"/>
163 <param name="header" value="True"/>
143 <param name="selected_task" value="load"/> 164 <param name="selected_task" value="load"/>
144 <output name="outfile_predict" file="nn_prediction_result03.tabular"/> 165 <output name="outfile_predict" file="nn_prediction_result03.tabular"/>
145 </test> 166 </test>
146 </tests> 167 </tests>
147 <help><![CDATA[ 168 <help><![CDATA[