Mercurial > repos > anmoljh > feature_selection
comparison feature_selection.xml @ 0:b4d2524e79ab draft
planemo upload commit a1f4dd8eb560c649391ada1a6bb9505893a35272
author | anmoljh |
---|---|
date | Fri, 01 Jun 2018 05:16:19 -0400 |
parents | |
children | f3aeeb15d4cc |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b4d2524e79ab |
---|---|
1 <tool id="feature_selection" name="Feature Selector" version="1.0"> | |
2 <description> | |
3 This tool selects best features which are used as input for model building. | |
4 </description> | |
5 | |
6 <requirements> | |
7 <requirement type="package" version="3.2.1">R</requirement> | |
8 <requirement type="package" version="1.0">carettools</requirement> | |
9 </requirements> | |
10 | |
11 <stdio> | |
12 <exit_code range="1:" /> | |
13 </stdio> | |
14 | |
15 <command interpreter="Rscript">feature_selection.R $input $profile $finalset $function1 $resampling $repeat $number $corcutoff $SAMPLING \${GALAXY_SLOTS:-1} >/dev/null 2>&1 </command> | |
16 | |
17 <inputs> | |
18 <param name="input" type="data" format="rdata" label="Select input data file" help="input .RData file" /> | |
19 <param name="SAMPLING" type="select" label="Select Sampling Method for imbalanced data" help="Defualt is with No sampling. you may choose downsample or upsample" > | |
20 <option value="garBage" selected="true">No Sampling</option> | |
21 <option value="downsampling">downsample</option> | |
22 <option value="upsampling">upsample</option> | |
23 </param> | |
24 <param name="function1" type="select" display="radio" label="Select appropriate function for algorithm" > | |
25 <option value="rfFuncs" selected="true">random forest based function </option> | |
26 <option value="lmFuncs">linear model based function</option> | |
27 <option value="treebagFuncs">treebag(CART) based function</option> | |
28 <option value="nbFuncs">neive bayes based function</option> | |
29 </param> | |
30 | |
31 <param name="corcutoff" type="float" value= "0.8" min="0.0" max = "1.0" label="Select correlation cutoff" help="values bewteen 0-1. fileds above cufoff value removed from data " /> | |
32 <param name="resampling" type="select" label="Select appropriate resampling method" > | |
33 <option value="repeatedcv" selected="true">repeatedcv </option> | |
34 <option value="boot">boot</option> | |
35 <option value="cv">cv</option> | |
36 <option value="boot632">boot632</option> | |
37 </param> | |
38 | |
39 <param name="repeat" type="select" label="Set Number of times to repeat" help="default is 3 "> | |
40 <option value="3" selected="true">3</option> | |
41 <option value="5">5</option> | |
42 <option value="7">7</option> | |
43 <option value="10">10</option> | |
44 </param> | |
45 <param name="number" type="select" label="Set Number of times Resample" help="default is 10"> | |
46 <option value="10" selected="true">10</option> | |
47 <option value="5">5</option> | |
48 <option value="15">15</option> | |
49 <option value="20">20</option> | |
50 <option value="25">25</option> | |
51 </param> | |
52 </inputs> | |
53 | |
54 <outputs> | |
55 <data format="data" name="profile" label="$function1-profile" /> | |
56 <data format="rdata" name="finalset" label="Selected_feature.RData "/> | |
57 </outputs> | |
58 | |
59 <tests> | |
60 <test> | |
61 <param name="input" value="testinput.RData"/> | |
62 <param name="function1" value="rfFuncs" /> | |
63 <param name="corcutoff" value="0.6" /> | |
64 <param name="resampling" value="repeatedcv" /> | |
65 <param name="repeat" value="1" /> | |
66 <param name="number" value="5" /> | |
67 <param name="SAMPLING" value="garb" /> | |
68 <param name="cores" value="1" /> | |
69 <output name="profile" file="rfprofile.RData" compare="sim_size" delta="2000000" /> | |
70 <output name="finalset" file="selected_fet.RData" compare="sim_size" delta="2000000"/> | |
71 </test> | |
72 </tests> | |
73 | |
74 <help> | |
75 | |
76 .. class:: infomark | |
77 | |
78 **RFE based feature selection for classification and regression** | |
79 | |
80 Input file must be RData file obtained by converting csv file in to RData. | |
81 | |
82 output "Selected_feature.RData" file used for model building purpose.While profile | |
83 | |
84 represents feature selection model. | |
85 | |
86 Correlation cutoff value is desired for choosing independent variables For example | |
87 | |
88 Cutoff value = 0.8 removes all descriptors sharing equal or highet correlation values. | |
89 | |
90 User may choose varous resampling methods in combination with repeats and times of resample. | |
91 | |
92 </help> | |
93 | |
94 | |
95 | |
96 </tool> |