Mercurial > repos > malex > bayesase

diff calculate_priors_from_ase_count_tables.xml @ 0:e979cb57a5d5 draft default tip
"planemo upload for repository https://github.com/McIntyre-Lab/BayesASE/tree/main/galaxy commit 9b70598ef46a73632d9e0fa0c6ce6776fb5e9d6a"
author: malex
date: Thu, 14 Jan 2021 21:51:36 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/calculate_priors_from_ase_count_tables.xml	Thu Jan 14 21:51:36 2021 +0000
@@ -0,0 +1,139 @@
+<tool id="calculate_priors_from_ase_count_tables" name="Calculate Prior Probability Estimates" version="21.1.13">
+    <description>using ASE Count Tables</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command><![CDATA[
+    mkdir outputs;
+    cd outputs;
+    calculate_priors_ase_count_tables.py
+    --output=`pwd`
+    --design=$design
+    --collection_identifiers="${",".join($collection.keys())}"
+    --collection_filenames="${",".join(map(str, $collection))}"
+
+]]></command>
+    <inputs>
+        <param name="design" type="data" format="tabular,tsv" label="Priors Design File" help="Select the Priors Design file [created by the Reformat Sample Design File Tool]"/>
+        <param name="collection" type="data_collection" collection_type="list" label="Select dataset collection for Prior Calculations" help="Datasets in this collection can be generated from simulated or DNA reads using the Align and Count' and 'Summarize counts' Workflows."/>
+    </inputs>
+    <outputs>
+      <collection name="split_output" type="list" label="${tool.name} on ${on_string}: Calculate Prior Probabilities">
+        <discover_datasets pattern="(?P&lt;designation&gt;.*)" ext="tsv" directory="outputs"/>
+      </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="design" value="summarize_counts_testdata/sample_design_file.tabular" ftype="tsv"/>
+            <param name="collection">
+              <collection type="list">
+                <element name="FEATURE_ID" value="summarize_counts_testdata/filtered_ASE_counts_tables_BASE"/>
+              </collection>
+             </param>
+             <output_collection name="split_output" type="list">
+               <element name="FEATURE_ID">
+                 <assert_contents>
+                   <has_text_matching expression="Calculated_priors" />
+                  </assert_contents>
+                </element>
+              </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+**Tool Description**
+
+The Calculate Prior Probability Estimates tool calculates prior probability estimates for each of the comparates.
+Prior Probability is a part of Bayesian statistical inference, and is useful for ruling out whether alleles aligning better to one parent is due to allelic imbalance,
+or if it is really due to mapping biases or technical errors in the data.
+
+The output file contains computations for three variables, prior_[comparate name]_g1 and prior_[comparate name]_g2, which represent the probability that the feature will align to either parental genome, and prior_[comparate name]_both, which are the chances the feature will align to equally well to both genomes.
+
+
+Calculation::
+
+    prior_{comparate)_both= [# reads aligning equally to both genomes]                prior_{comparate)_{G1/G2}= 1-(prior_both)
+                 -----------------------------------------                                                        --------------
+                      [total uniquely mapped reads]                                                                      2
+
+
+
+
+--------------------------------------------------------------------------------------------------------------------
+
+**Inputs**
+
+**Prior Design File [REQUIRED]**
+
+(1) This tool requires the Priors Design file, which can be created using the *Reformat Sample Design File* tool.
+
+The Priors Design File must be in the following format and contain the correct header order:
+
+1. G1- This is the name of the paternal genome
+2. G2- The name of the maternal genome
+3. comparate - contains comparate conditions. Exclude replicate number.
+
+
+
+Example of input design file::
+
+    +-------+------+-----------+
+    |   G1  |  G2  | comparate |
+    +-------+------+-----------+
+    | W1118 |  W55 | W55_M     |
+    +-------+------+-----------+
+    | W1118 |  W55 | W55_M     |
+    +-------+------+-----------+
+
+
+**Input for Priors Calculations - Summed ASE Counts Tables [Required]**
+
+The collection of datasets in Summarized and Filtered Format.
+These datasets can be generated by running simulated reads or DNA reads through the 'Align and Count' and 'Summarize Counts' Workflows.
+
+Example Input File::
+
+	FEATURE_ID	g1	g2	W55_M_flag_analyze     W55_M_num_reps   	W55_M_g1_total_rep1     	W55_M_g2_total_rep1     	W55_M_both_total_rep1       	W55_M_flag_apn_rep1     	W55_M_APN_total_reads_rep1	W55_M_APN_both_rep1	W55_M_g1_total_rep2     	W55_M_g2_total_rep2     	W55_M_both_total_rep2   	W55_M_flag_apn_rep2     	W55_M_APN_total_reads_rep2      	W55_M_APN_both_rep2
+	l(1)G0196	W1118	W55	1    			2			691				519				5020 	        		1   				29.073464805232			23.4243873865079		1075				812				7481				1			43.7266913990042		34.9168212437762
+	CG8920  	W1118	W55  	1			2			29				 62				 647    			1				10.3878993081113		9.10716914470779		38				126				920				1			15.2470189901369		12.9534815250994
+	CG10932 	W1118	W55 	1			2			163				122		                1112		    		1				89.9299663299663		71.5858585858586		237				134				1881				1			144.974410774411		121.086195286195
+
+
+
+----------------------------------------------------------------------------------------------------
+
+**Tool Outputs**
+
+-One TSV file containing the calculated prior probabilities for reads that align preferentially to G1, G2, and those that align equally to both.
+
+Example Priors Calculation File::
+
+    +--------------+------------------+-----------------+---------------+
+    | FEATURE_ID   | prior_W55_M_both | prior_W55_M_g1  |prior_W55_M_g2 |
+    +==============+==================+=================+===============+
+    | l(1)G0196    | 0.799907266902   | 0.1183611532625 | 0.08173157983 |
+    +--------------+------------------+-----------------+---------------+
+    | CG10932      | 0.853881278538   | 0.0597412480974 | 0.08637747336 |
+    +--------------+------------------+-----------------+---------------+
+    | CG8920       | 0.80895522388    | 0.1233830845771 | 0.06766169154 |
+    +--------------+------------------+-----------------+---------------+
+    | Mapmodulin   | 0.8987341772151  | 0.0855389336401 | 0.01572688914 |
+    +--------------+------------------+-----------------+---------------+
+
+Header descriptions::
+
+    prior_{comparate}_both: prior probability estimate for the total number of reads that mapped equally well to both updated parental genomes
+    prior_{comparate}_g1: prior probability estimate for the total number of reads that mapped to updated parental genome 1
+    prior_{comparate}_g2: prior probability estimate for the total number of reads that mapped to updated parental genome 2
+
+
+    ]]></help>
+    <citations>
+            <citation type="bibtex">@ARTICLE{Miller20BASE,
+            author = {Brecca Miller, Alison M. Morse, Elyse Borgert, Zihao Liu, Kelsey Sinclair, Gavin Gamble, Fei Zou, Jeremy Newman, Luis Leon Novello, Fabio Marroni, Lauren M. McIntyre},
+            title = {Testcrosses are an efficient strategy for identifying cis regulatory variation: Bayesian analysis of allele imbalance among conditions (BASE)},
+            journal = {????},
+            year = {submitted for publication}
+            }</citation>
+        </citations>
+</tool>
author	malex
date	Thu, 14 Jan 2021 21:51:36 +0000
parents
children