diff check_for_lost_reads.xml @ 0:e979cb57a5d5 draft default tip

"planemo upload for repository https://github.com/McIntyre-Lab/BayesASE/tree/main/galaxy commit 9b70598ef46a73632d9e0fa0c6ce6776fb5e9d6a"
author malex
date Thu, 14 Jan 2021 21:51:36 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/check_for_lost_reads.xml	Thu Jan 14 21:51:36 2021 +0000
@@ -0,0 +1,91 @@
+<tool id="check_for_lost_reads" name="Check for lost reads" version="21.1.13">
+    <description>verify starting FASTQ read number equals read number after running BWASplitSAM tool</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command><![CDATA[
+    check_lost_reads.py
+    --alnSum1=$alnSum1
+    --alnSum2=$alnSum2
+    --fq=$fq
+    --out=$out
+]]></command>
+    <inputs>
+        <param name="alnSum1" type="data" format="tabular" label="BWASplitSAM Alignment Summary G1" help="The G1 alignment summary file [from BWASplitSAM tool] for updated genome1 containing all read types [Required]"/>
+        <param name="alnSum2" type="data" format="tabular" label="BWASplitSAM Alignment Summary G2" help="The G2 alignment summary file [from BWASplitSAMtool] for updated genome2 containing all read types [Required]"/>
+        <param name="fq" type="data" format="fastq" label="Name of the FASTQ file" help="Name of FASTQ file used to generate the alignments selected above."/>
+    </inputs>
+    <outputs>
+       <data format="tabular" name="out" label="${tool.name} on ${on_string}: Check start readNum = alignment readNum"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="alnSum1" ftype="data"      value="align_and_counts_test_data/W1118_G1_BWASplitSAM_summary.tabular"/>
+            <param name="alnSum2" ftype="data"      value="align_and_counts_test_data/W55_G2_BWASplitSAM_summary.tabular"/>
+            <param name="fq"      ftype="data"      value="align_and_counts_test_data/W55_M_1_1.fastq"/>
+            <output name="out"          file="align_and_counts_test_data/check_for_lost_reads_BASE_test_data.tabular" />
+        </test>
+    </tests>
+    <help><![CDATA[
+**Tool Description**
+
+This tool checks that all reads in the starting FASTQ file are accounted for in the G1 and G2 SAM files after running the BWASplitSAM tool.
+The reads counts in the input FASTQ file are compared to the 'count_total_reads' column in the summary of aligned reads TSV files generated byt he BWASplitSAM tool.
+
+
+**Input**
+-The tool requires three input files
+
+	(1) The output summary TSV file generated from the BWASplitSAM tool for the updated genome1 (G1) SAM file
+	(2) The output summary TSV file generated from the BWASplitSAM tool for the updated genome2 (G2) SAM file
+	(3) The FASTQ file using to generate the above G1 and G2 SAM files - used to calculate the number of starting reads
+
+Example summary TSV file from BWASplitSAM script:
+
+    +---------------+---------------------+---------------------------------------+---------------------+---------------------+----------------------+---------------------+-----------------+
+    |   Name        |  count_total_reads  | count_mapped_read_opposite_strand     | count_unmapped_read |  count_mapped_read  | count_ambiguous_read |count_chimeric_read  | count_notprimary|
+    +===============+=====================+=======================================+=====================+=====================+======================+=====================+=================+
+    | dataset_2216  |  14                 |   5                                   |    0                |    9                |0                     |                 0   |    0            |
+    +---------------+---------------------+---------------------------------------+---------------------+---------------------+----------------------+---------------------+-----------------+
+
+
+**Output**
+
+	 A TSV file containing:
+		(1) starting read counts in the FASTQ file [start_read_num]
+		(2) read counts in the G1 alignment [readNum_G1]
+		(3) read counts in the G2 alignment [readNum_G2]
+		(4) indicator flag for whether the starting count = G1 count [flag_start_readNum_eq_readNum_G1]
+		(5) indicator flag for whether the starting count = G2 count [flag_start_readNum_eq_readNum_G2]
+
+Sample Output TSV file
+
+    +---------------+---------------------+---------------+------------+------------------------------------+------------------------------------+
+    |   fqName      |  start_read_num     |    readNum_G1 | readNum_G2 |  flag_start_readNum_eq_readNum_G1  | flag_start_readNum_eq_readNum_G2   |
+    +===============+=====================+===============+============+====================================+====================================+
+    | dataset_2216  |  14                 |   14          |    14      |    1                               |1                                   |
+    +---------------+---------------------+---------------+------------+------------------------------------+------------------------------------+
+
+Columns are::
+
+        ◦ FqName
+        ◦ start_read_num: The total number of reads in the FASTQ file
+        ◦ readNum_G1: The total number of reads in the summary TSV file output from BWASplitSAM for updated parental genome 1 (G1)
+        ◦ readNum_G2: The number of reads found in the summary TSV file output from BWASplitSAM for updated parental genome 2 (G2)
+        ◦ flag_start_readNum_eq_readNum_{G1/G2}: 0/1 indicator flag where “1” means that the number of reads in the FASTQ file matches the total read number in the G1 or G2 BWASplitSAM summary file.
+
+In the above example, flag_start_readNum_eq_readNum_G1 and flag_start_readNum_eq_readNum_G2 are both 1, indicating all reads are accounted for.
+
+The BayesASE align and count workflow should be rerun if flag_start_readNum_eq_readNum_{G1/G2} is a 0.
+
+    ]]></help>
+    <citations>
+            <citation type="bibtex">@ARTICLE{Miller20BASE,
+            author = {Brecca Miller, Alison M. Morse, Elyse Borgert, Zihao Liu, Kelsey Sinclair, Gavin Gamble, Fei Zou, Jeremy Newman, Luis Leon Novello, Fabio Marroni, Lauren M. McIntyre},
+            title = {Testcrosses are an efficient strategy for identifying cis regulatory variation: Bayesian analysis of allele imbalance among conditions (BASE)},
+            journal = {????},
+            year = {submitted for publication}
+            }</citation>
+        </citations>
+</tool>