Mercurial > repos > malex > secimtools
diff bland_altman_plot.xml @ 1:2e7d47c0b027 draft
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author | malex |
---|---|
date | Mon, 08 Mar 2021 22:04:06 +0000 |
parents | |
children | caba07f41453 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bland_altman_plot.xml Mon Mar 08 22:04:06 2021 +0000 @@ -0,0 +1,129 @@ +<tool id="secimtools_bland_altman_plot" name="Bland-Altman (BA) Plot" version="@WRAPPER_VERSION@"> + <description>- Create pairwise BA plots for outlier detection.</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command detect_errors="exit_code"><![CDATA[ +bland_altman_plot.py +--input $input +--design $design +--ID $uniqID +--figure $ba_plots +--flag_dist $outlier_dist_plots +--flag_sample $flag_sample +--flag_feature $flag_feature +--resid_cutoff $resid_cutoff +--sample_flag_cutoff $sample_cutoff +--feature_flag_cutoff $feature_cutoff +--prop_feature $proportion_of_features +--prop_sample $proportion_of_samples + +#if $group + --group $group + + #if $processOnly: + --process_only "$processOnly" + #end if +#end if + ]]></command> + <inputs> + <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file is not tab-separated see TIP below."/> + <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/> + <param name="uniqID" size="30" type="text" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that contains unique feature identifiers."/> + <param name="resid_cutoff" type="integer" size="30" value="3" label="Outlier Cutoff" help="Residual cutoff value, this value will flag samples with residuals ≥ this cutoff value."/> + <param name="sample_cutoff" type="float" size="30" value="0.2" min="0" max="1" label="Sample Flag Cutoff" help="Flag a sample as 1 if the proportion of features within a sample that are outliers exceeds this cutoff. [Number between 0-1]."/> + <param name="feature_cutoff" type="float" size="30" value="0.05" min="0" max="1" label="Feature Flag Cutoff" help="Flag a feature as 1 if the proportion of times this feature was identified as an outlier exceeds this cutoff. [Number between 0-1]."/> + <param name="group" type="text" size="30" value="" optional="true" label="Group/Treatment [Optional]" help="Name of the column in your Design File that contains group classifications."/> + <param name="processOnly" size="30" type="text" value="" optional="true" label="Group Name [Optional]" help="Name of the group(s) that you want to process. Separate multiple group names with spaces (e.g. RC,control,treatment). Leave blank to process all groups. Requires the group parameter."/> + </inputs> + <outputs> + <data format="pdf" name="ba_plots" label= "${tool.name} on ${on_string}: BA plot" /> + <data format="pdf" name="outlier_dist_plots" label= "${tool.name} on ${on_string}: Distribution"/> + <data format="tabular" name="flag_sample" label= "${tool.name} on ${on_string}: Flag Sample"/> + <data format="tabular" name="flag_feature" label= "${tool.name} on ${on_string}: Flag Feature"/> + <data format="tabular" name="proportion_of_features" label= "${tool.name} on ${on_string}: Proportion of Feature"/> + <data format="tabular" name="proportion_of_samples" label= "${tool.name} on ${on_string}: Proportion of Samples"/> + </outputs> + <tests> + <test> + <param name="input" value="ST000006_data.tsv"/> + <param name="design" value="ST000006_design.tsv"/> + <param name="uniqID" value="Retention_Index" /> + <param name="group" value="White_wine_type_and_source" /> + <output name="ba_plots" file="ST000006_bland_altman_plot_with_group_figure.pdf" compare="sim_size" delta="10000" /> + <output name="outlier_dist_plots" file="ST000006_bland_altman_plot_with_group_flag_distribution.pdf" compare="sim_size" delta="10000" /> + <output name="flag_sample" file="ST000006_bland_altman_plot_with_group_flag_sample.tsv" /> + <output name="flag_feature" file="ST000006_bland_altman_plot_with_group_flag_feature.tsv" /> + <output name="prop_feature" file="ST000006_bland_altman_plot_with_group_proportion_feature.tsv" /> + <output name="prop_sample" file="ST000006_bland_altman_plot_with_group_proportion_sample.tsv" /> + </test> + </tests> + <help><![CDATA[ + +@TIP_AND_WARNING@ + +**Tool Description** + +The Bland-Altman plot (BA-Plot) is used to look at the concordance of data between pairs of samples, particularly between replicates. +The script generates BA-plots for all pairwise combinations of samples. +If the Group/Treatment column and group name(s) in that column are provided then BA-Plots are generated only for pairwise combinations within the specified Group -- group name combination. +In addition to generating the BA-plots, a linear regression fit is calculated between the values that correspond to the pair of samples to identify (flag) any unusual outlying values. +The flags produced by the regression fit are used to generate distribution plots and text files for (i) each sample (column) and for (ii) each feature (row). + + +-------------------------------------------------------------------------------- + +**Input** + + - Two input datasets are required. + +@WIDE@ + +**NOTE:** The sample IDs must match the sample IDs in the Design File +(below). Extra columns will automatically be ignored. + +@METADATA@ + +@UNIQID@ + + +**Outlier Cutoff – flagging values** + +- Residual cutoff value, this value will flag samples with residuals ≥ than this cutoff value. + + (1) If the magnitude of the residuals from the linear regression on the BA-plot exceeds the user-defined threshold, then a value is flagged as an outlier. This cutoff can be adjusted by the user, the default is 3. + + (2) If a value is identified as a leverage point using Cook's D with a p-value cutoff of 0.5, then the value is flagged. This cannot be adjusted. + + (3) If a value is identified as a leverage point using the DFFITS technique it is also flagged. This cannot be adjusted. + +**Sample Flag Cutoff – flagging samples** + + - Flag a sample as 1 if the proportion of features within a sample that are outliers exceeds this cutoff. [Number between 0-1]. + +**Feature Flag Cutoff – flagging features** + + - Flag a feature as 1 if the proportion of times this feature was identified as an outlier exceeds this cutoff. [Number between 0-1]. + +@GROUP@ + +**Group ID** + + - Name of the group(s) that you want to process. Separate multiple groupIDs with spaces. Leave blank to process all groups. Requires the group parameter. + +-------------------------------------------------------------------------------- + +**Output** + +This tool outputs four (or five) different files depending on the input settings: +(1) a PDF file containing BA-plots and scatterplots for each pair of samples +(2) a PDF file containing histograms of the most flagged features and samples +(3) two TSV files containing flags: one for samples and one for features +(4) if a grouping variable name is specified in the input, a TSV file containing flags for each group is also generated. +(5) two TSV files containing (i) the proportion of features flagged per sample and (ii) the proportion of samples flagged per feature. +If a sample (or feature) is flagged, the user should consider removing it from further analysis. + + ]]></help> + <expand macro="citations"/> +</tool>