comparison summarize_sam_compare_cnts_table_1cond.xml @ 0:e979cb57a5d5 draft default tip

"planemo upload for repository https://github.com/McIntyre-Lab/BayesASE/tree/main/galaxy commit 9b70598ef46a73632d9e0fa0c6ce6776fb5e9d6a"
author malex
date Thu, 14 Jan 2021 21:51:36 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e979cb57a5d5
1 <tool id="summarize_sam_compare_cnts_table_1cond" name="Summarize and Filter ASE Count Tables" version="21.1.13">
2 <description>based on user-defined APN threshold</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <command><![CDATA[
8 mkdir outputs;
9 cd outputs;
10 summarize_sam_compare_cnts_table_1cond.py
11 --design=$design
12 --collection_identifiers="${",".join($collection.keys())}"
13 --collection_filenames="${",".join(map(str, $collection))}"
14 --parent1=$parent1
15 --parent2=$parent2
16 --sampleCol=$sampleCol
17 --sampleIDCol=$sampleIDCol
18 --apn=$apn
19 --out=`pwd`
20 ]]></command>
21 <inputs>
22 <param name="design" type="data" format="tabular,tsv" label="Sample Design file" help="Select the Sample Design File - this can be created with the Combine ASE Counts Tables tool."/>
23 <param name="collection" type="data_collection" collection_type="list" label="Collection of Combined ASE Count Tables" help="Select the collection containing ASE Count Tables combined (summed) across technical replicates."/>
24 <param name="parent1" type="text" label="Updated genome 1 (G1)" value="G1" help="Enter the name of the column in the design file for genome 1 (e.g. G1)" />
25 <param name="parent2" type="text" label="Updated genome 2 (G2)" value="G2" help="Enter the name of the column in the design file for genome 2 (e.g. G2)" />
26 <param name="sampleIDCol" type="text" value="sampleID" label="Sample ID Column" help="Enter the name of the column in the design file containing sampleIDs" />
27 <param name="sampleCol" type="text" value="comparate" label="Comparate Column" help="Enter the header name of the column in design file containing comparate names"/>
28 <param name="apn" type="text" label="APN (Average Reads per Nucleotide) threshold" value="5" help="Enter an APN threshold value for flagging a feature as 'expressed'. The default setting is 5."/>
29 </inputs>
30 <outputs>
31 <collection name="split_output" type="list" label="${tool.name} on ${on_string}: Summarized and Filtered ASE Counts table">
32 <discover_datasets pattern="(?P&lt;name&gt;ase.*)" ext="tabular" sort_by="reverse_filename" directory="outputs" />
33 </collection>
34 </outputs>
35 <tests>
36 <test>
37 <param name="design" value="BASE_testdata/summarize_counts_testdata/summarization_df_BASE.tabular" ftype="tabular"/>
38 <param name="collection" value="BASE_testdata/summarize_counts_testdata/combined_ASE_counts_tables_BASE" ftype="data_collection"/>
39 <param name="parent1" value="G1" ftype="text"/>
40 <param name="parent2" value="G2" ftype="text"/>
41 <param name="sampleIDcol" value="sampleID" ftype="text"/>
42 <param name="samplecol" value="comparate" ftype="text"/>
43 <param name="apn" value="1" ftype="text"/>
44 <output_collection name="split_output" type="list">
45 <element name="FEATURE_ID">
46 <assert_contents>
47 <has_text_matching expression="Summarize_ASE_counts_test_data"/>
48 </assert_contents>
49 </element>
50 </output_collection>
51 </test>
52 </tests>
53 <help><![CDATA[
54 **Tool Description**
55
56 The Summarize SAM Compare Counts tool filters data based on whether or not they meet the requirements for input into the Bayesian module to test for allelic specific expression.
57 Using the Sample Design File, it creates one file with all the biological replicates for a given comparate.
58 The data are then filtered, removing features that don't meet a user-defined APN (average number of reads per nucleotide) threshold, which inicates whether or not a feature is expressed by having high read coverage.
59
60 The default APN threshold value is 5. Features that do not meet this cutoff are considered to have insufficient information needed to estimate model parameters.
61
62 For each feature, if at least 1 replicate in a comparate has an APN value greater than the user-specified cutoff value then the binary indicator flag in the output (flag_analyze) will be equal to 1, else equal to 0.
63
64
65 **INPUTS**
66
67 **Sample Design File [REQUIRED]**
68
69 The design file must contain the sampleIDs for the summed count table. This sampleID must contain the biological replicate number that the summed ASE Counts table are aggregates of.
70
71 The comparate column contains the names of the comparates and their conditions. It can contain the same information as the sampleID but excludes the biological replicate number.
72
73 **TIP**: The Sample Design file can be created using the *Combine Counts Table* tool.
74
75 An example Sample Design File:
76
77 G1 G2 sampleID comparate
78 W1118 W55 W55_M_rep1 W55_M
79 W1118 W55 W55_M_rep2 W55_M
80 W1118 W55 W55_V_rep1 W55_V
81 W1118 W55 W55_V_rep2 W55_V
82
83
84 **Collection of Summed ASE Counts Tables [REQUIRED]**
85
86 Input the collection of summed ASE counts table created by the Combine Counts Tables tool.
87
88 Example input ASE Counts Table:
89
90 +------------+-------------------+-------------------+------------+------------------+----------------+----------------+--------------------------+-------------------------+-------------------------+--------------------------+--------------------+--------------------+
91 |Feature_ID |APN_both |APN_total_reads | BOTH_EXACT |BOTH_INEXACT_EQUAL|SAM_A_ONLY_EXACT|SAM_B_ONLY_EXACT| SAM_A_EXACT_SAM_B_INEXACT|SAM_B_EXACT_SAM_A_INEXACT|SAM_A_ONLY_SINGLE_INEXACT|SAM_B_ONLY_SINGLE_INEXACT |SAM_A_INEXACT_BETTER|SAM_B_INEXACT_BETTER|
92 +============+===================+===================+============+==================+================+================+==========================+=========================+=========================+==========================+====================+====================+
93 | l(1)G0196 |10.255101044615834 |12.723420872791175 | 721 |1476 |120 |173 |0 | 2 |96 |136 |0 |2 |
94 +------------+-------------------+-------------------+------------+------------------+----------------+----------------+--------------------------+-------------------------+-------------------------+--------------------------+--------------------+--------------------+
95 | CG8920 |7.0372442219932285 |8.62888267334020 | 207 |293 |31 |62 |0 | 0 |8 |12 |0 |0 |
96 +------------+-------------------+-------------------+------------+------------------+----------------+----------------+--------------------------+-------------------------+-------------------------+--------------------------+--------------------+--------------------+
97
98
99 **Header names [REQUIRED]**
100 Type in the designated names of the following columns in the design file:
101
102 (1) Genome 1 - the name of the column containing updated genome 1 (eg G1)
103 (2) Genome 2 - the name of the column containing updated genome 2 (eg G2)
104 (3) SampleID Column - the name of the column containing the sampleIDs
105 (4) Comparate Column - the name of the column containing comparate information
106
107 **APN Threshold [REQUIRED]**
108
109 Specify an APN threshold for flagging features as 'expressed'. Features that do not meet this threshold are considered to have coverage and will not be included in the Bayesian analysis.
110
111 **NOTE**: The default setting is 5.
112
113
114 **OUTPUTS**
115
116
117 **This tool outputs the following:**
118
119 (1) For each comparate, a summary TSV file containing the flagged indicators recording whether or
120 not a feature meets the specified APN threshold. In the below column header descriptions,
121 {comparate} refers tothe comparate in the Sample Design File (e.g. W55_M). The number of columns
122 generated is dependent on the number of replicates.
123
124 The first three rows of an example output file:
125
126 FEATURE_ID g1 g2 W55_M_flag_analyze W55_M_num_reps W55_M_g1_total_rep1 W55_M_g2_total_rep1 W55_M_both_total_rep1 W55_M_flag_apn_rep1 W55_M_APN_total_reads_rep1 W55_M_APN_both_rep1 W55_M_g1_total_rep2 W55_M_g2_total_rep2 W55_M_both_total_rep2 W55_M_flag_apn_rep2 W55_M_APN_total_reads_rep2 W55_M_APN_both_rep2
127 l(1)G0196 W1118 W55 0 2 0 0 3 0 0.253164557 0.253164557 0 0 0 0 0 0
128 CG8920 W1118 W55 0 2 0 0 2 0 0.660066007 0.660066007 0 0 0 0 0 0
129 CG10932 W1118 W55 0 2 0 0 0 0 0 0 0 0 0 0 0 0
130
131 In the example output above, the features have low read counts and they do not surpass the default APN threshold of 5 (APN threshold can be changed by user), therefore the flag_analyze variable=0, and the features would be excluded from Bayesian Analysis.
132
133 Column header definitions::
134
135 ◦ {comparate}_flag_analyze: 0/1 binary indicator flag where a “1” means that at least one replicate for the indicated comparate has an APN greater than the user-specified cutoff value.
136 ◦ {comparate_n}_num_reps: The amount of replicates for the indicated comparate.
137 ◦ counts_{comparate}_g1_total_{replicate_number}: Total number of unique reads from a given replicate that mapped to updated parental genome 1
138 ◦ counts_{comparate}_g2_total_{replicate_number}: Total number of unique reads from a given replicate that mapped to updated parental genome 2
139 ◦ counts_{comparate}_both_total_{replicate_number}: Total number of unique reads from a given replicate that mapped equally well to both updated parental genomes
140 ◦ {comparate}_flag_apn_{replicate_number}: 0/1 flag where a “1” indicates that the APN value for a given feature is above the user-defined APN threshold
141 ◦ {comparate}_total_reads_APN_{replicate_number}: The calculated APN value for the total number of unique reads that mapped to a given feature
142 ◦ {comparate}_both_APN_{replicate_number}: The calculated APN value for the number of unique reads that mapped equally well to both updated parental genomes for a given feature
143
144
145 ]]></help>
146 <citations>
147 <citation type="bibtex">@ARTICLE{Miller20BASE,
148 author = {Brecca Miller, Alison M. Morse, Elyse Borgert, Zihao Liu, Kelsey Sinclair, Gavin Gamble, Fei Zou, Jeremy Newman, Luis Leon Novello, Fabio Marroni, Lauren M. McIntyre},
149 title = {Testcrosses are an efficient strategy for identifying cis regulatory variation: Bayesian analysis of allele imbalance among conditions (BASE)},
150 journal = {????},
151 year = {submitted for publication}
152 }</citation>
153 </citations>
154 </tool>
155