diff check_aln_design_file.xml @ 0:e979cb57a5d5 draft default tip

"planemo upload for repository https://github.com/McIntyre-Lab/BayesASE/tree/main/galaxy commit 9b70598ef46a73632d9e0fa0c6ce6776fb5e9d6a"
author malex
date Thu, 14 Jan 2021 21:51:36 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/check_aln_design_file.xml	Thu Jan 14 21:51:36 2021 +0000
@@ -0,0 +1,87 @@
+<tool id="base_check_alignment_design_file" name="Check Alignment Design File" version="21.1.13">
+    <description>for correct formatting and duplicate FASTQ names </description>
+    <macros>
+      <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command><![CDATA[
+    check_aln_design_file.py
+    --design=$design
+    --logfile=$logfile
+    --dups=$dups
+]]></command>
+    <inputs>
+        <param name="design" type="data" format="tabular,tsv" label="Alignment Design file" help="Design file containing FASTQ file names, sampleIDs, etc. Refer to the help section at the bottom of this page for the format. [Required]"/>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="dups" label="${tool.name} on ${on_string}: FASTQ Duplicates"/>
+        <data format="tabular" name="logfile" label="${tool.name} on ${on_string}: Alignment Design Criteria"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="design" ftype="data"     value="align_and_counts_test_data/alignment_design_file.tsv"/>
+            <output name="dups"  ftype="data" file="align_and_counts_test_data/alignment_design_file_duplicates.tabular" />
+            <output name="logfile"  ftype="data" file="align_and_counts_test_data/alignment_design_file_criteria.csv" />
+        </test>
+    </tests>
+    <help><![CDATA[
+**Tool Description**
+
+This tool checks to make sure the Alignment design file is formatted correctly and has all needed headers. Also verifies that there are no duplicate FastQ file names.
+Default values are already in place to save user time.
+
+**NOTE:** There are two Design Files that must be created and supplied by the user. They are the *Alignment Design File* and the *Comparate Design File*.
+
+All others (Sample and Priors) are created and used as intermediates by the BASE workflows.
+
+**The design file should contain the following columns, in order:**
+
+	(1) G1 - name of parental genome 1 for alignment
+	(2) G2 - name of parental genome 2 for alignment
+	(3) sampleID - sample identifier (no spaces)
+	(4) fqName - name of the column containing the FASTQ file names, WITHOUT the extension
+	(5) fqExtension - FASTQ file extension, for example, .fq or .fastq (NOT gzipped)
+	(6) techRep - name of the column containing the technical replicates for each sampleID, for example, the same library run on different lanes.
+	(7) readLength - the read length in base pairs
+
+The sample identifier must contain the biological replicate number, and the comparate conditions to be tested in the Bayesian Model for Allelic Imbalance.
+
+An example of a comparate condition is W1118_F, where W1118 is the genome and F refers to female. There must be at least two comparate conditions for Bayesian Analysis.
+
+In the example below, there are two comparate condtions, W55_Mated and W55_Virgin, and E1 refers to the biological replicate number.
+
+An example design file::
+
+	G1	G2	sampleID		fqName			fqExtension	techRep	readLength
+	W1118	W55	mel_W55_Mated_E1	mel_W55_Mated_E1_R1	.fq		1	150
+	W1118	W55	mel_W55_Mated_E1	mel_W55_Mated_E1_R2	.fq		2	150
+	W1118	W55	mel_W55_Mated_E1	mel_W55_Mated_E1_R3	.fq		3	150
+	W1118	W55	mel_W55_Virgin_E1	mel_W55_Virgin_E1_R1	.fq		1	150
+	W1118	W55	mel_W55_Virgin_E1	mel_W55_Virgin_E1_R2	.fq		2	150
+	W1118	W55	mel_W55_Virgin_E1	mel_W55_Virgin_E1_R3	.fq		3	150
+
+If using simulated reads, include the technical replicate column, but label the technical replicates with the same number
+
+Example design file for simulated data ::
+
+	G1	G2      sampleID   fqName	fqExtension	techRep	readLength
+	W1118	W55      W55_M_1   SRR1989586_1     .fq		1	96
+	W1118	W55      W55_M_2   SRR1989588_1     .fq		1	96
+	W1118	W55      W55_V_1   SRR1989592_1     .fq		1	96
+	W1118	W55      W55_V_2   SRR1989594_1     .fq		1	96
+
+**Outputs**
+
+	(1) a logfile with information about whether the column names are correct and if there are any duplicated FASTQ file names.
+	(2) a text file containing a list of any duplicated FASTQ file names, if present.
+
+    ]]></help>
+    <citations>
+            <citation type="bibtex">@ARTICLE{Miller20BASE,
+            author = {Brecca Miller, Alison M. Morse, Elyse Borgert, Zihao Liu, Kelsey Sinclair, Gavin Gamble, Fei Zou, Jeremy Newman, Luis Leon Novello, Fabio Marroni, Lauren M. McIntyre},
+            title = {Testcrosses are an efficient strategy for identifying cis regulatory variation: Bayesian analysis of allele imbalance among conditions (BASE)},
+            journal = {????},
+            year = {submitted for publication}
+            }</citation>
+        </citations>
+</tool>