comparison model_prediction.xml @ 0:db511406350a draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author bgruening
date Fri, 09 Aug 2019 07:11:11 -0400
parents
children af7ed4d45619
comparison
equal deleted inserted replaced
-1:000000000000 0:db511406350a
1 <tool id="model_prediction" name="Model Prediction" version="@VERSION@">
2 <description>predicts on new data using a preffited model</description>
3 <macros>
4 <import>main_macros.xml</import>
5 <import>keras_macros.xml</import>
6 </macros>
7 <expand macro="python_requirements"/>
8 <expand macro="macro_stdio"/>
9 <version_command>echo "@VERSION@"</version_command>
10 <command>
11 <![CDATA[
12 python '$__tool_directory__/model_prediction.py'
13 --inputs '$inputs'
14 --infile_estimator '$infile_estimator'
15 --outfile_predict '$outfile_predict'
16 --infile_weights '$infile_weights'
17 #if $input_options.selected_input == 'seq_fasta'
18 --fasta_path '$input_options.fasta_path'
19 #elif $input_options.selected_input == 'variant_effect'
20 --ref_seq '$input_options.ref_genome_file'
21 --vcf_path '$input_options.vcf_file'
22 #else
23 --infile1 '$input_options.infile1'
24 #end if
25 ]]>
26 </command>
27 <configfiles>
28 <inputs name="inputs" />
29 </configfiles>
30 <inputs>
31 <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/>
32 <param name="infile_weights" type="data" format="h5" optional="true" label="Choose the dataset containing weights for the estimator above" help="Optional. For deep learning only."/>
33 <param argument="method" type="select" label="Select invocation method">
34 <option value="predict" selected="true">predict</option>
35 <option value="predict_proba">predict_proba</option>
36 </param>
37 <conditional name="input_options">
38 <param name="selected_input" type="select" label="Select input data type for prediction">
39 <option value="tabular" selected="true">tabular data</option>
40 <option value="sparse">sparse matrix</option>
41 <option value="seq_fasta">sequnences in a fasta file</option>
42 <option value="variant_effect">reference genome and variant call file</option>
43 </param>
44 <when value="tabular">
45 <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/>
46 <param name="header1" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
47 <conditional name="column_selector_options_1">
48 <expand macro="samples_column_selector_options" multiple="true"/>
49 </conditional>
50 </when>
51 <when value="sparse">
52 <param name="infile1" type="data" format="txt" label="Select a sparse matrix" help=""/>
53 </when>
54 <when value="seq_fasta">
55 <param name="fasta_path" type="data" format="fasta" label="Dataset containing fasta genomic/protein sequences" help="Sequences will be one-hot encoded to arrays."/>
56 <param name="seq_type" type="select" label="Sequence type">
57 <option value="FastaDNABatchGenerator">DNA</option>
58 <option value="FastaRNABatchGenerator">RNA</option>
59 <option value="FastaProteinBatchGenerator">Protein</option>
60 </param>
61 </when>
62 <when value="variant_effect">
63 <param name="ref_genome_file" type="data" format="fasta" label="Dataset containing reference genomic sequence" help="fasta"/>
64 <param name="blacklist_regions" type="select" label="blacklist regioins" help="A pre-loaded list of blacklisted intervals.Refer to `selene` for details.">
65 <option value="none" selected="true">None</option>
66 <option value="hg38">hg38</option>
67 <option value="hg19">hg19</option>
68 </param>
69 <param name="vcf_file" type="data" format="vcf" label="Dataset containing sequence variations" help="vcf"/>
70 <param name="seq_length" type="integer" value="1000" label="Encoding seqence length" help="A stretch of sequence surrounding the variation position on the reference genome."/>
71 <param name="output_reference" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Predict the reference sequence?" help="If False, predict on the variant sequence."/>
72 </when>
73 </conditional>
74 </inputs>
75 <outputs>
76 <data format="tabular" name="outfile_predict"/>
77 </outputs>
78 <tests>
79 <test>
80 <param name="infile_estimator" value="best_estimator_.zip" ftype="zip"/>
81 <param name="method" value="predict"/>
82 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
83 <param name="header1" value="true" />
84 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/>
85 <output name="outfile_predict" file="model_pred01.tabular"/>
86 </test>
87 <test>
88 <param name="infile_estimator" value="keras_model04" ftype="zip"/>
89 <param name="infile_weights" value="train_test_eval_weights02.h5" ftype="h5"/>
90 <param name="method" value="predict"/>
91 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
92 <param name="header1" value="true" />
93 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/>
94 <output name="outfile_predict" >
95 <assert_contents>
96 <has_n_columns n="1"/>
97 <has_text text="66.936"/>
98 <has_text text="59.94"/>
99 <has_text text="66.19"/>
100 <has_text text="56.82"/>
101 <has_text text="74.907"/>
102 </assert_contents>
103 </output>
104 </test>
105 </tests>
106 <help>
107 <![CDATA[
108 **What it does**
109
110 Given a fitted estimator and new data sets, this tool outpus the prediction results on the data sets via invoking the estimator's `predict` or `predict_proba` method.
111
112 For estimator, this tool supports fitted sklearn estimators (pickled) and trained deep learning models (model skeleton + weights). It predicts on three different dataset inputs,
113
114 - tabular
115
116 - sparse
117
118 - bio-sequences in a fasta file
119
120 - reference genome and variant call file
121
122 ]]>
123 </help>
124 <expand macro="sklearn_citation">
125 <expand macro="keras_citation"/>
126 <expand macro="selene_citation"/>
127 </expand>
128 </tool>