diff bland_altman_plot.xml @ 1:2e7d47c0b027 draft

"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author malex
date Mon, 08 Mar 2021 22:04:06 +0000
parents
children caba07f41453
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bland_altman_plot.xml	Mon Mar 08 22:04:06 2021 +0000
@@ -0,0 +1,129 @@
+<tool id="secimtools_bland_altman_plot" name="Bland-Altman (BA) Plot" version="@WRAPPER_VERSION@">
+    <description>- Create pairwise BA plots for outlier detection.</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+bland_altman_plot.py
+--input $input
+--design $design
+--ID $uniqID
+--figure $ba_plots
+--flag_dist $outlier_dist_plots
+--flag_sample $flag_sample
+--flag_feature $flag_feature
+--resid_cutoff $resid_cutoff
+--sample_flag_cutoff $sample_cutoff
+--feature_flag_cutoff $feature_cutoff
+--prop_feature $proportion_of_features
+--prop_sample $proportion_of_samples
+
+#if $group
+    --group $group
+
+    #if $processOnly:
+        --process_only "$processOnly"
+    #end if
+#end if
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file is not tab-separated see TIP below."/>
+        <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
+        <param name="uniqID" size="30" type="text" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that contains unique feature identifiers."/>
+        <param name="resid_cutoff" type="integer" size="30" value="3" label="Outlier Cutoff" help="Residual cutoff value, this value will flag samples with residuals ≥ this cutoff value."/>
+        <param name="sample_cutoff" type="float" size="30" value="0.2" min="0" max="1" label="Sample Flag Cutoff" help="Flag a sample as 1 if the proportion of features within a sample that are outliers exceeds this cutoff. [Number between 0-1]."/>
+        <param name="feature_cutoff" type="float" size="30" value="0.05" min="0" max="1" label="Feature Flag Cutoff" help="Flag a feature as 1 if the proportion of times this feature was identified as an outlier exceeds this cutoff. [Number between 0-1]."/>
+        <param name="group" type="text" size="30"  value="" optional="true" label="Group/Treatment [Optional]" help="Name of the column in your Design File that contains group classifications."/>
+        <param name="processOnly" size="30" type="text" value="" optional="true" label="Group Name [Optional]" help="Name of the group(s) that you want to process. Separate multiple group names with spaces (e.g. RC,control,treatment). Leave blank to process all groups. Requires the group parameter."/>
+    </inputs>
+    <outputs>
+        <data format="pdf" name="ba_plots" label= "${tool.name} on ${on_string}: BA plot"  />
+        <data format="pdf" name="outlier_dist_plots" label= "${tool.name} on ${on_string}: Distribution"/>
+        <data format="tabular" name="flag_sample" label= "${tool.name} on ${on_string}: Flag Sample"/>
+        <data format="tabular" name="flag_feature" label= "${tool.name} on ${on_string}: Flag Feature"/>
+        <data format="tabular" name="proportion_of_features" label= "${tool.name} on ${on_string}: Proportion of Feature"/>
+        <data format="tabular" name="proportion_of_samples" label= "${tool.name} on ${on_string}: Proportion of Samples"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input"  value="ST000006_data.tsv"/>
+            <param name="design" value="ST000006_design.tsv"/>
+            <param name="uniqID" value="Retention_Index" />
+            <param name="group"  value="White_wine_type_and_source" />
+            <output name="ba_plots"           file="ST000006_bland_altman_plot_with_group_figure.pdf" compare="sim_size" delta="10000" />
+            <output name="outlier_dist_plots" file="ST000006_bland_altman_plot_with_group_flag_distribution.pdf" compare="sim_size" delta="10000" />
+            <output name="flag_sample"        file="ST000006_bland_altman_plot_with_group_flag_sample.tsv" />
+            <output name="flag_feature"       file="ST000006_bland_altman_plot_with_group_flag_feature.tsv" />
+            <output name="prop_feature"       file="ST000006_bland_altman_plot_with_group_proportion_feature.tsv" />
+            <output name="prop_sample"        file="ST000006_bland_altman_plot_with_group_proportion_sample.tsv" />
+        </test>
+    </tests>
+    <help><![CDATA[
+
+@TIP_AND_WARNING@
+
+**Tool Description**
+
+The Bland-Altman plot (BA-Plot) is used to look at the concordance of data between pairs of samples, particularly between replicates.
+The script generates BA-plots for all pairwise combinations of samples.
+If the Group/Treatment column and group name(s) in that column are provided then BA-Plots are generated only for pairwise combinations within the specified Group -- group name combination.
+In addition to generating the BA-plots, a linear regression fit is calculated between the values that correspond to the pair of samples to identify (flag) any unusual outlying values.
+The flags produced by the regression fit are used to generate distribution plots and text files for (i) each sample (column) and for (ii) each feature (row).
+
+
+--------------------------------------------------------------------------------
+
+**Input**
+
+    - Two input datasets are required.
+
+@WIDE@
+
+**NOTE:** The sample IDs must match the sample IDs in the Design File
+(below). Extra columns will automatically be ignored.
+
+@METADATA@
+
+@UNIQID@
+
+
+**Outlier Cutoff – flagging values**
+
+- Residual cutoff value, this value will flag samples with residuals ≥ than this cutoff value.
+
+        (1) If the magnitude of the residuals from the linear regression on the BA-plot exceeds the user-defined threshold, then a value is flagged as an outlier. This cutoff can be adjusted by the user, the default is 3.
+
+        (2) If a value is identified as a leverage point using Cook's D with a p-value cutoff of 0.5, then the value is flagged. This cannot be adjusted.
+
+        (3) If a value is identified as a leverage point using the DFFITS technique it is also flagged. This cannot be adjusted.
+
+**Sample Flag Cutoff – flagging samples**
+
+    - Flag a sample as 1 if the proportion of features within a sample that are outliers exceeds this cutoff. [Number between 0-1].
+
+**Feature Flag Cutoff  – flagging features**
+
+    - Flag a feature as 1 if the proportion of times this feature was identified as an outlier exceeds this cutoff. [Number between 0-1].
+
+@GROUP@
+
+**Group ID**
+
+    - Name of the group(s) that you want to process. Separate multiple groupIDs with spaces. Leave blank to process all groups. Requires the group parameter.
+
+--------------------------------------------------------------------------------
+
+**Output**
+
+This tool outputs four (or five) different files depending on the input settings:
+(1) a PDF file containing BA-plots and scatterplots for each pair of samples
+(2) a PDF file containing histograms of the most flagged features and samples
+(3) two TSV files containing flags: one for samples and one for features
+(4) if a grouping variable name is specified in the input, a TSV file containing flags for each group is also generated.
+(5) two TSV files containing (i) the proportion of features flagged per sample and (ii) the proportion of samples flagged per feature.
+If a sample (or feature) is flagged, the user should consider removing it from further analysis.
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>