Mercurial > repos > computational-metabolomics > dimspy_missing_values_sample_filter
diff missing_values_sample_filter.xml @ 0:91441e41cc3d draft
"planemo upload for repository https://github.com/computational-metabolomics/dimspy-galaxy commit 6321871098b2c4bc9e321d20b7e66fff3d641839"
author | computational-metabolomics |
---|---|
date | Sat, 11 Apr 2020 16:47:00 -0400 |
parents | |
children | 2cf12cd2c53a |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/missing_values_sample_filter.xml Sat Apr 11 16:47:00 2020 -0400 @@ -0,0 +1,110 @@ +<tool id="dimspy_missing_values_sample_filter" name="Missing Values Sample Filter" version="@TOOL_VERSION@+galaxy@GALAXY_TOOL_VERSION@"> + <description> - Remove samples with a high percentage of missing values</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command detect_errors="exit_code"> + <![CDATA[ + dimspy mv-sample-filter + --input '$hdf5_file_in' + --output '$hdf5_file_out' + --max-fraction $max_fraction + && + dimspy create-sample-list + --input '$hdf5_file_out' + --output '$samplelist' + --delimiter tab + #if $hdf5_to_txt.standard + && + @HDF5_PM_TO_TXT@ + #end if + #if $hdf5_to_txt.comprehensive + && + @HDF5_PM_TO_TXT_COMPREHENSIVE@ + #end if + ]]> + </command> + <inputs> + <param name="hdf5_file_in" argument="--input" type="data" format="h5" label="Peak Intensity Matrix (HDF5 file)" help="" /> + <param name="max_fraction" argument="--max-fraction" type="float" min="0" max="1.0" value="0.8" label="Maximum percentage of missing values." help="" /> + <param name="delimiter" argument="--delimiter" type="hidden" value="tab" label="" help=""/> + <expand macro="hdf5_pm_to_txt" /> + </inputs> + <outputs> + <expand macro="outputs_peak_intensity_matrix" /> + <data name="samplelist" format="tsv" label="${tool.name} on ${on_string}: Sample Metadata (updated)" /> + </outputs> + <tests> + <test> + <param name="hdf5_file_in" value="pm_as_bf_sf.h5" ftype="h5"/> + <param name="max_fraction" value="0.8"/> + <param name="delimiter" value="tab"/> + <conditional name="hdf5_to_txt"> + <param name="standard" value="True"/> + <param name="comprehensive" value="False"/> + <param name="representation_samples" value="rows"/> + <param name="matrix_attr" value="intensity"/> + </conditional> + <output name="hdf5_file_out" file="pm_as_bf_sf_mv.h5" ftype="h5" compare="sim_size"/> + <output name="matrix_file_out" file="peak_matrix_as_bf_sf_mv.txt" ftype="tsv"/> + <output name="samplelist" file="sample_list_after_mv_filter.txt" ftype="tsv"/> + </test> + </tests> + <help> +---------------------------- +Missing Values Sample Filter +---------------------------- + +.. + +--------------------------------------------- + +Description +----------- + +Standard DIMS processing workflow: Process Scans -> Replicate Filter -> Align Samples -> **[Missing values sample filter]** -> Blank Filter -> Sample Filter -> Matrix processing -> Statistics + +| + +This tool is used to remove study samples with greater-than a user-defined “Maximum percentage of missing values” from the peak intensity matrix. A missing value is defined as the absence of a recorded peak intensity value for a specific mass spectral peak, in a specific study sample. + +Samples with large numbers of missing values are often observed where a failed mass spectral acquisition has occurred, the reasons for which are many and diverse. + +--------------------------------------------- + +Parameters +---------- + +**Peak Intensity Matrix (HDF5 file)** (REQUIRED) - for routine usage, the input peak intensity matrix should be that generated from the 'Align samples' tool. + +**Maximum percentage of missing values** (REQUIRED; default = 0.8) - a numeric value ranging from 0 to 1 (decimal representation of percentage), where: + + - A value of 0 (i.e. 0%) corresponds to a very harsh filtering procedure, in which only those samples with zero missing values are retained in the output peak matrix. + + - A value of 1 (i.e. 100%) corresponds to a very liberal filtering procedure, in which samples with as many as 100% missing values will be retained in the output peak matrix. + +| + +@help_options_addtional_output@ + +| + +--------------------------------------------- + +Output file(s) +-------------- + +**IMPORTANT** - in all outputs except for the (optional) comprehensive output, if a sample had greater-than the user defined maximum percentage of missing values, then this sample is removed from the output peak matrix. + + +@help_outputs_matrix@ + +--------------------------------------------- + +@github_developers_contributors@ +@license@ + </help> + <expand macro="citations" /> +</tool> +