comparison bland_altman_plot.xml @ 1:2e7d47c0b027 draft

"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author malex
date Mon, 08 Mar 2021 22:04:06 +0000
parents
children caba07f41453
comparison
equal deleted inserted replaced
0:b54326490b4d 1:2e7d47c0b027
1 <tool id="secimtools_bland_altman_plot" name="Bland-Altman (BA) Plot" version="@WRAPPER_VERSION@">
2 <description>- Create pairwise BA plots for outlier detection.</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command detect_errors="exit_code"><![CDATA[
8 bland_altman_plot.py
9 --input $input
10 --design $design
11 --ID $uniqID
12 --figure $ba_plots
13 --flag_dist $outlier_dist_plots
14 --flag_sample $flag_sample
15 --flag_feature $flag_feature
16 --resid_cutoff $resid_cutoff
17 --sample_flag_cutoff $sample_cutoff
18 --feature_flag_cutoff $feature_cutoff
19 --prop_feature $proportion_of_features
20 --prop_sample $proportion_of_samples
21
22 #if $group
23 --group $group
24
25 #if $processOnly:
26 --process_only "$processOnly"
27 #end if
28 #end if
29 ]]></command>
30 <inputs>
31 <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file is not tab-separated see TIP below."/>
32 <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
33 <param name="uniqID" size="30" type="text" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that contains unique feature identifiers."/>
34 <param name="resid_cutoff" type="integer" size="30" value="3" label="Outlier Cutoff" help="Residual cutoff value, this value will flag samples with residuals ≥ this cutoff value."/>
35 <param name="sample_cutoff" type="float" size="30" value="0.2" min="0" max="1" label="Sample Flag Cutoff" help="Flag a sample as 1 if the proportion of features within a sample that are outliers exceeds this cutoff. [Number between 0-1]."/>
36 <param name="feature_cutoff" type="float" size="30" value="0.05" min="0" max="1" label="Feature Flag Cutoff" help="Flag a feature as 1 if the proportion of times this feature was identified as an outlier exceeds this cutoff. [Number between 0-1]."/>
37 <param name="group" type="text" size="30" value="" optional="true" label="Group/Treatment [Optional]" help="Name of the column in your Design File that contains group classifications."/>
38 <param name="processOnly" size="30" type="text" value="" optional="true" label="Group Name [Optional]" help="Name of the group(s) that you want to process. Separate multiple group names with spaces (e.g. RC,control,treatment). Leave blank to process all groups. Requires the group parameter."/>
39 </inputs>
40 <outputs>
41 <data format="pdf" name="ba_plots" label= "${tool.name} on ${on_string}: BA plot" />
42 <data format="pdf" name="outlier_dist_plots" label= "${tool.name} on ${on_string}: Distribution"/>
43 <data format="tabular" name="flag_sample" label= "${tool.name} on ${on_string}: Flag Sample"/>
44 <data format="tabular" name="flag_feature" label= "${tool.name} on ${on_string}: Flag Feature"/>
45 <data format="tabular" name="proportion_of_features" label= "${tool.name} on ${on_string}: Proportion of Feature"/>
46 <data format="tabular" name="proportion_of_samples" label= "${tool.name} on ${on_string}: Proportion of Samples"/>
47 </outputs>
48 <tests>
49 <test>
50 <param name="input" value="ST000006_data.tsv"/>
51 <param name="design" value="ST000006_design.tsv"/>
52 <param name="uniqID" value="Retention_Index" />
53 <param name="group" value="White_wine_type_and_source" />
54 <output name="ba_plots" file="ST000006_bland_altman_plot_with_group_figure.pdf" compare="sim_size" delta="10000" />
55 <output name="outlier_dist_plots" file="ST000006_bland_altman_plot_with_group_flag_distribution.pdf" compare="sim_size" delta="10000" />
56 <output name="flag_sample" file="ST000006_bland_altman_plot_with_group_flag_sample.tsv" />
57 <output name="flag_feature" file="ST000006_bland_altman_plot_with_group_flag_feature.tsv" />
58 <output name="prop_feature" file="ST000006_bland_altman_plot_with_group_proportion_feature.tsv" />
59 <output name="prop_sample" file="ST000006_bland_altman_plot_with_group_proportion_sample.tsv" />
60 </test>
61 </tests>
62 <help><![CDATA[
63
64 @TIP_AND_WARNING@
65
66 **Tool Description**
67
68 The Bland-Altman plot (BA-Plot) is used to look at the concordance of data between pairs of samples, particularly between replicates.
69 The script generates BA-plots for all pairwise combinations of samples.
70 If the Group/Treatment column and group name(s) in that column are provided then BA-Plots are generated only for pairwise combinations within the specified Group -- group name combination.
71 In addition to generating the BA-plots, a linear regression fit is calculated between the values that correspond to the pair of samples to identify (flag) any unusual outlying values.
72 The flags produced by the regression fit are used to generate distribution plots and text files for (i) each sample (column) and for (ii) each feature (row).
73
74
75 --------------------------------------------------------------------------------
76
77 **Input**
78
79 - Two input datasets are required.
80
81 @WIDE@
82
83 **NOTE:** The sample IDs must match the sample IDs in the Design File
84 (below). Extra columns will automatically be ignored.
85
86 @METADATA@
87
88 @UNIQID@
89
90
91 **Outlier Cutoff – flagging values**
92
93 - Residual cutoff value, this value will flag samples with residuals ≥ than this cutoff value.
94
95 (1) If the magnitude of the residuals from the linear regression on the BA-plot exceeds the user-defined threshold, then a value is flagged as an outlier. This cutoff can be adjusted by the user, the default is 3.
96
97 (2) If a value is identified as a leverage point using Cook's D with a p-value cutoff of 0.5, then the value is flagged. This cannot be adjusted.
98
99 (3) If a value is identified as a leverage point using the DFFITS technique it is also flagged. This cannot be adjusted.
100
101 **Sample Flag Cutoff – flagging samples**
102
103 - Flag a sample as 1 if the proportion of features within a sample that are outliers exceeds this cutoff. [Number between 0-1].
104
105 **Feature Flag Cutoff – flagging features**
106
107 - Flag a feature as 1 if the proportion of times this feature was identified as an outlier exceeds this cutoff. [Number between 0-1].
108
109 @GROUP@
110
111 **Group ID**
112
113 - Name of the group(s) that you want to process. Separate multiple groupIDs with spaces. Leave blank to process all groups. Requires the group parameter.
114
115 --------------------------------------------------------------------------------
116
117 **Output**
118
119 This tool outputs four (or five) different files depending on the input settings:
120 (1) a PDF file containing BA-plots and scatterplots for each pair of samples
121 (2) a PDF file containing histograms of the most flagged features and samples
122 (3) two TSV files containing flags: one for samples and one for features
123 (4) if a grouping variable name is specified in the input, a TSV file containing flags for each group is also generated.
124 (5) two TSV files containing (i) the proportion of features flagged per sample and (ii) the proportion of samples flagged per feature.
125 If a sample (or feature) is flagged, the user should consider removing it from further analysis.
126
127 ]]></help>
128 <expand macro="citations"/>
129 </tool>