comparison sample.xml @ 0:447272175720 draft default tip

"planemo upload for repository https://github.com/shenwei356/csvtk commit 3a97e1b79bf0c6cdd37d5c8fb497b85531a563ab"
author nml
date Tue, 19 May 2020 17:12:29 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:447272175720
1 <tool id="csvtk_sample" name="csvtk-sample" version="@VERSION@+@GALAXY_VERSION@">
2 <description> random proportion of dataset</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <expand macro="version_cmd" />
8 <command detect_errors="exit_code"><![CDATA[
9
10 ###################
11 ## Start Command ##
12 ###################
13
14 csvtk sample --num-cpus "\${GALAXY_SLOTS:-1}"
15
16 ## Add additional flags as specified ##
17 #######################################
18 $global_param.illegal_rows
19 $global_param.empty_rows
20 $global_param.header
21 $global_param.lazy_quotes
22
23 ## Set Tabular input/output flag if input is tabular ##
24 #######################################################
25 #if $in_1.is_of_type("tabular"):
26 -t -T
27 #end if
28
29 ## Set Input ##
30 ###############
31 '$in_1'
32
33 ## other ##
34 ###########
35 -p '$proportion'
36 -s '$seed'
37 $line_number
38
39 ## To output ##
40 ###############
41 &> sampled
42
43 ]]></command>
44 <inputs>
45 <expand macro="singular_input" />
46 <param name="proportion" type="float" argument="-p" value="0.5"
47 min="0"
48 max="1"
49 label="Proportion of Data to Sample"
50 />
51 <param name="seed" type="integer" argument="-s" value="1900"
52 label="Random Seed"
53 help="Specify a seed number to sample data with"
54 />
55 <param name="line_number" type="boolean" checked="false" argument="-n"
56 truevalue="-n"
57 falsevalue=""
58 label="Create column with original line numbers of sampled data"
59 />
60 <expand macro="global_parameters" />
61 </inputs>
62 <outputs>
63 <data format_source="in_1" name="sampled" from_work_dir="sampled" label="${proportion} of ${in_1.name} sampled" />
64 </outputs>
65 <tests>
66 <test>
67 <param name="in_1" value="plot.csv" />
68 <param name="proportion" value="0.5" />
69 <param name="seed" value="11" />
70 <output name="sampled" value="sampled_1.csv" />
71 </test>
72 <test>
73 <param name="in_1" value="plot.csv" />
74 <param name="proportion" value="0.7" />
75 <param name="seed" value="11" />
76 <param name="line_number" value="true" />
77 <output name="sampled" value="sampled_2.csv" />
78 </test>
79 </tests>
80 <help><![CDATA[
81
82 Csvtk - Sample Help
83 -------------------
84
85 Info
86 ####
87
88 Csvtk-sample samples a random (as defined by the seed) proportion of a dataset that can be used further.
89
90 .. class:: warningmark
91
92 Single quotes are not allowed in text inputs!
93
94 @HELP_INPUT_DATA@
95
96
97 Usage
98 #####
99
100 To run csvtk-sample, all you need is a valid (as defined above) CSV or TSV file.
101
102 **Example**
103
104 Input table:
105
106 +-------+--------+
107 | Group | Length |
108 +=======+========+
109 | 1 | 1500 |
110 +-------+--------+
111 | 2 | 1000 |
112 +-------+--------+
113 | 1 | 1500 |
114 +-------+--------+
115 | 3 | 2000 |
116 +-------+--------+
117
118 To get a 0.5 proportion (50% sample) of the population, our input would be 0.5 for the proportion (-p) and then some random seed.
119
120 Our output could then look as such:
121
122 +-------+--------+
123 | Group | Length |
124 +=======+========+
125 | 1 | 1500 |
126 +-------+--------+
127 | 3 | 2000 |
128 +-------+--------+
129
130 If we used the same seed, input, and proportion with the "Create column with original line numbers of sampled data"
131 set to yes, we would get the following table:
132
133 +---+-------+--------+
134 | n | Group | Length |
135 +===+=======+========+
136 | 1 | 1 | 1500 |
137 +---+-------+--------+
138 | 4 | 3 | 2000 |
139 +---+-------+--------+
140
141 --------
142
143
144 @HELP_COLUMNS@
145
146
147 @HELP_END_STATEMENT@
148
149
150 ]]></help>
151 <expand macro="citations" />
152 </tool>