comparison missing_values_sample_filter.xml @ 0:91441e41cc3d draft

"planemo upload for repository https://github.com/computational-metabolomics/dimspy-galaxy commit 6321871098b2c4bc9e321d20b7e66fff3d641839"
author computational-metabolomics
date Sat, 11 Apr 2020 16:47:00 -0400
parents
children 2cf12cd2c53a
comparison
equal deleted inserted replaced
-1:000000000000 0:91441e41cc3d
1 <tool id="dimspy_missing_values_sample_filter" name="Missing Values Sample Filter" version="@TOOL_VERSION@+galaxy@GALAXY_TOOL_VERSION@">
2 <description> - Remove samples with a high percentage of missing values</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command detect_errors="exit_code">
8 <![CDATA[
9 dimspy mv-sample-filter
10 --input '$hdf5_file_in'
11 --output '$hdf5_file_out'
12 --max-fraction $max_fraction
13 &&
14 dimspy create-sample-list
15 --input '$hdf5_file_out'
16 --output '$samplelist'
17 --delimiter tab
18 #if $hdf5_to_txt.standard
19 &&
20 @HDF5_PM_TO_TXT@
21 #end if
22 #if $hdf5_to_txt.comprehensive
23 &&
24 @HDF5_PM_TO_TXT_COMPREHENSIVE@
25 #end if
26 ]]>
27 </command>
28 <inputs>
29 <param name="hdf5_file_in" argument="--input" type="data" format="h5" label="Peak Intensity Matrix (HDF5 file)" help="" />
30 <param name="max_fraction" argument="--max-fraction" type="float" min="0" max="1.0" value="0.8" label="Maximum percentage of missing values." help="" />
31 <param name="delimiter" argument="--delimiter" type="hidden" value="tab" label="" help=""/>
32 <expand macro="hdf5_pm_to_txt" />
33 </inputs>
34 <outputs>
35 <expand macro="outputs_peak_intensity_matrix" />
36 <data name="samplelist" format="tsv" label="${tool.name} on ${on_string}: Sample Metadata (updated)" />
37 </outputs>
38 <tests>
39 <test>
40 <param name="hdf5_file_in" value="pm_as_bf_sf.h5" ftype="h5"/>
41 <param name="max_fraction" value="0.8"/>
42 <param name="delimiter" value="tab"/>
43 <conditional name="hdf5_to_txt">
44 <param name="standard" value="True"/>
45 <param name="comprehensive" value="False"/>
46 <param name="representation_samples" value="rows"/>
47 <param name="matrix_attr" value="intensity"/>
48 </conditional>
49 <output name="hdf5_file_out" file="pm_as_bf_sf_mv.h5" ftype="h5" compare="sim_size"/>
50 <output name="matrix_file_out" file="peak_matrix_as_bf_sf_mv.txt" ftype="tsv"/>
51 <output name="samplelist" file="sample_list_after_mv_filter.txt" ftype="tsv"/>
52 </test>
53 </tests>
54 <help>
55 ----------------------------
56 Missing Values Sample Filter
57 ----------------------------
58
59 ..
60
61 ---------------------------------------------
62
63 Description
64 -----------
65
66 Standard DIMS processing workflow: Process Scans -> Replicate Filter -> Align Samples -> **[Missing values sample filter]** -> Blank Filter -> Sample Filter -> Matrix processing -> Statistics
67
68 |
69
70 This tool is used to remove study samples with greater-than a user-defined “Maximum percentage of missing values” from the peak intensity matrix. A missing value is defined as the absence of a recorded peak intensity value for a specific mass spectral peak, in a specific study sample.
71
72 Samples with large numbers of missing values are often observed where a failed mass spectral acquisition has occurred, the reasons for which are many and diverse.
73
74 ---------------------------------------------
75
76 Parameters
77 ----------
78
79 **Peak Intensity Matrix (HDF5 file)** (REQUIRED) - for routine usage, the input peak intensity matrix should be that generated from the 'Align samples' tool.
80
81 **Maximum percentage of missing values** (REQUIRED; default = 0.8) - a numeric value ranging from 0 to 1 (decimal representation of percentage), where:
82
83 - A value of 0 (i.e. 0%) corresponds to a very harsh filtering procedure, in which only those samples with zero missing values are retained in the output peak matrix.
84
85 - A value of 1 (i.e. 100%) corresponds to a very liberal filtering procedure, in which samples with as many as 100% missing values will be retained in the output peak matrix.
86
87 |
88
89 @help_options_addtional_output@
90
91 |
92
93 ---------------------------------------------
94
95 Output file(s)
96 --------------
97
98 **IMPORTANT** - in all outputs except for the (optional) comprehensive output, if a sample had greater-than the user defined maximum percentage of missing values, then this sample is removed from the output peak matrix.
99
100
101 @help_outputs_matrix@
102
103 ---------------------------------------------
104
105 @github_developers_contributors@
106 @license@
107 </help>
108 <expand macro="citations" />
109 </tool>
110