Mercurial > repos > mvdbeek > damidseq_core

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/damidseq_core.xml	Wed Mar 22 09:56:09 2017 -0400
@@ -0,0 +1,118 @@
+<tool id="damidseq_core" name="damidseq" version="0.1.0">
+    <description>align, extend and normalize a DAMID-seq experiment</description>
+    <requirements>
+        <requirement type="package" version="1.4">damidseq_pipeline</requirement>
+    </requirements>
+    <version_command><![CDATA[damidseq_pipeline --help 2>&1| grep damidseq_pipeline]]></version_command>
+    <command detect_errors="aggressive"><![CDATA[
+        export HOME="\$PWD" &&
+        ln -f -s '$dam' A001.$dam.ext &&
+        ln -f -s '$dam_fusion' A002.$dam_fusion.ext &&
+        ln -f -s '$index' index.txt &&
+        damidseq_pipeline
+        --bins=$bins
+        --bowtie=1
+        --bowtie2_genome_dir='$reference_index.fields.path'
+        --extend_reads=$extend_reads
+        --extension_method='$extension_method'
+        $full_data_files
+        --gatc_frag_file='$gatc_frag_file'
+        --len=$len
+        --max_norm_value='$max_norm_value'
+        $method_subtract
+        --min_norm_value='$min_norm_value'
+        --norm_method=$norm_method
+        --norm_steps=$norm_steps
+        --output_format=$output_format
+        --q=$q
+        --qscore1max=$qscore1max
+        --qscore1min=$qscore1min
+        --qscore2max=$qscore2max
+        --threads=\${GALAXY_SLOTS:-4} &&
+        mv Fusion-vs-Dam.*.$output_format fusion.output
+    ]]></command>
+    <configfiles>
+        <configfile name="index">A1	Dam
+A2	Fusion</configfile>
+    </configfiles>
+    <inputs>
+        <param argument="--dam" type="data" format="fastq,fastq.gz" label="Control DAM alignment file"/>
+        <param name="dam_fusion" type="data" format="fastq,fastq.gz" label="DAM fusion alignment file"/>
+        <param name="reference_index" type="select" label="Select reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
+          <options from_data_table="bowtie2_indexes">
+            <filter type="sort_by" column="2"/>
+            <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+          </options>
+        </param>
+        <param argument="--gatc_frag_file" type="data" format="gff" label="GFF file with all GATC locations"/>
+        <param name="output_format" type="select" label="Select the output format for the peaks">
+            <option value="bedgraph">Bedgraph</option>
+            <option value="gff">GFF</option>
+        </param>
+        <param argument="--extend_reads" type="boolean" truevalue="1" falsevalue="0" checked="True" label="Perform read extension?"/>
+        <param argument="--extension_method" type="select" label="Select the read extension method" help="Select Full to extend all reads or GATC to extend reads to --len or to the next GATC site, whichever is shorter. Using this option increases peak resolution (default).">
+            <option value="gatc">To nearest GATC site</option>
+            <option value="full">Full</option>
+        </param>
+        <param argument="--full_data_files" type="boolean" truevalue="--full_data_file" falsevalue="" label="Output full binned ratio files (not only GATC array)"/>
+        <param argument="--len" type="integer" min="50" value="300" label="Length to extend reads to"/>
+        <param argument="--bins" type="integer" min="10" value="75" label="Width of bins to use for mapping reads"/>
+        <param argument="--min_norm_value" type="float" value="-5.0" label="Minimum log2 value to limit normalisation search at"/>
+        <param argument="--max_norm_value" type="float" value="5.0" label="Maximum log2 value to limit normalisation search at"/>
+        <param argument="--method_subtract" type="boolean" truevalue="--method_subtract" falsevalue="" label="Subtract DAM control values from DAM-fusion values instead of using the log2 ratio?"/>
+        <param argument="--norm_method" type="select" label="Select normalization method">
+            <option value="kde">kernel density estimation of log2 GATC fragment ratio (recommended)</option>
+            <option value="rpm">readcounts per million reads (not recommended for most use cases)</option>
+        </param>
+        <param argument="--norm_steps" type="integer" min="1" value="300" label="Number of points in normalisation routine"/>
+        <param argument="--q" type="integer" value="30" min="0" label="Cutoff average Q score for aligned reads"/>
+        <param argument="--qscore1min" type="float" min="0.0" value="0.4" max="1.0" label="min decile for normalising from Dam array"/>
+        <param argument="--qscore1max" type="float" min="0.0" value="1.0" max="1.0" label="max decile for normalising from Dam array"/>
+        <param argument="--qscore2max" type="float" min="0.0" value="1.0" max="1.0" label="max decile for normalising from fusion-protein array"/>
+    </inputs>
+    <outputs>
+        <data name="output_ratio" format="bedgraph" from_work_dir="fusion.output" label="DAM-fusion vs Dam-only ratio">
+            <change_format>
+                <when input="output_format" value="gff" format="gff" />
+            </change_format>
+        </data>
+        <data name="control_output" format="bam" from_work_dir="Dam-ext300.bam" label="DAM-only alignment"/>
+        <data name="fusion_output" format="bam" from_work_dir="Fusion-ext300.bam" label="DAM-fusion alignment"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="dam" value="A001.fastq"/>
+            <param name="dam_fusion" value="A002.fastq"/>
+            <param name="gatc_frag_file" value="dm6.GATC.gff"/>
+            <param name="index" value="dm6"/>
+            <param name="norm_method" value="rpm"/>
+            <output name="output_ratio" file="output_ratio.bedgraph"/>
+            <output name="control_output" file="control.bam"/>
+            <output name="fusion_output" file="fusion.bam"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+Processing DamID-seq data involves extending single-end reads, aligning
+the reads to the genome and determining the coverage, similar to
+processing regular ChIP-seq datasets. However, as DamID data is
+represented as a log2 ratio of (Dam-fusion/Dam), normalisation of the
+sample and Dam-only control is necessary and adding pseudocounts to
+mitigate the effect of background counts is highly recommended.
+
+damidseq_pipeline is a single script that automatically handles
+sequence alignment, read extension, binned counts, normalisation,
+pseudocount addition and final ratio file generation. The script uses
+FASTQ or BAM files as input, and outputs the final log2 ratio files in
+bedGraph (or optionally GFF) format.
+
+The output ratio files can easily be converted to TDF for viewing in IGV using
+igvtools. The files can be processed for peak calling using find_peaks or, if
+using RNA pol II DamID, transcribed genes can be determined using
+polii.gene.call.
+
+        ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btv386</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/A001.fastq	Wed Mar 22 09:56:09 2017 -0400
@@ -0,0 +1,4 @@
+@SN1078:205:3:1101:15342:2122#CTTGTAA
+GATGGTGACGTCCGTGTCCTGGACAATGACGACCGACGAG
++SN1078:205:3:1101:15342:2122#CTTGTAA
+BBBABADDHHHHHJHIIIJJJJJJJJJJJJIJJJJGJIHI
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/A002.fastq	Wed Mar 22 09:56:09 2017 -0400
@@ -0,0 +1,4 @@
+@SN1078:205:3:1101:15342:2122#CTTGTAA
+GATGGTGACGTCCGTGTCCTGGACAATGACGACCGACGAG
++SN1078:205:3:1101:15342:2122#CTTGTAA
+BBBABADDHHHHHJHIIIJJJJJJJJJJJJIJJJJGJIHI
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bowtie2_indices.loc	Wed Mar 22 09:56:09 2017 -0400
@@ -0,0 +1,1 @@
+dm6	dm6	dm6	${__HERE__}/bt2/dm6.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bt2/.GATC.gff	Wed Mar 22 09:56:09 2017 -0400
@@ -0,0 +1,3 @@
+X	.	.	157	161	1	+	.	.
+X	.	.	364	368	1	+	.	.
+X	.	.	499	503	1	+	.	.
Binary file test-data/bt2/dm6.fa.1.bt2 has changed
Binary file test-data/bt2/dm6.fa.2.bt2 has changed
Binary file test-data/bt2/dm6.fa.3.bt2 has changed
Binary file test-data/bt2/dm6.fa.4.bt2 has changed
Binary file test-data/bt2/dm6.fa.rev.1.bt2 has changed
Binary file test-data/bt2/dm6.fa.rev.2.bt2 has changed
Binary file test-data/control.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dm6.GATC.gff	Wed Mar 22 09:56:09 2017 -0400
@@ -0,0 +1,3 @@
+X	.	.	157	161	1	+	.	.
+X	.	.	364	368	1	+	.	.
+X	.	.	499	503	1	+	.	.
Binary file test-data/fusion.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_ratio.bedgraph	Wed Mar 22 09:56:09 2017 -0400
@@ -0,0 +1,3 @@
+track type=bedGraph name="Fusion-vs-Dam" description="Fusion DamIDseq"
+X	159	366	0
+X	366	501	0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/bowtie2_indices.loc.sample	Wed Mar 22 09:56:09 2017 -0400
@@ -0,0 +1,37 @@
+# bowtie2_indices.loc.sample
+# This is a *.loc.sample file distributed with Galaxy that enables tools
+# to use a directory of indexed data files. This one is for Bowtie2 and Tophat2.
+# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup
+# First create these data files and save them in your own data directory structure.
+# Then, create a bowtie_indices.loc file to use those indexes with tools.
+# Copy this file, save it with the same name (minus the .sample),
+# follow the format examples, and store the result in this directory.
+# The file should include an one line entry for each index set.
+# The path points to the "basename" for the set, not a specific file.
+# It has four text columns seperated by TABS.
+#
+# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
+#
+# So, for example, if you had hg18 indexes stored in:
+#
+#    /depot/data2/galaxy/hg19/bowtie2/
+#
+# containing hg19 genome and hg19.*.bt2 files, such as:
+#    -rw-rw-r-- 1 james   james   914M Feb 10 18:56 hg19canon.fa
+#    -rw-rw-r-- 1 james   james   914M Feb 10 18:56 hg19canon.1.bt2
+#    -rw-rw-r-- 1 james   james   683M Feb 10 18:56 hg19canon.2.bt2
+#    -rw-rw-r-- 1 james   james   3.3K Feb 10 16:54 hg19canon.3.bt2
+#    -rw-rw-r-- 1 james   james   683M Feb 10 16:54 hg19canon.4.bt2
+#    -rw-rw-r-- 1 james   james   914M Feb 10 20:45 hg19canon.rev.1.bt2
+#    -rw-rw-r-- 1 james   james   683M Feb 10 20:45 hg19canon.rev.2.bt2
+#
+# then the bowtie2_indices.loc entry could look like this:
+#
+#hg19	hg19	Human (hg19)	/depot/data2/galaxy/hg19/bowtie2/hg19canon
+#
+#More examples:
+#
+#mm10	mm10	Mouse (mm10)	/depot/data2/galaxy/mm10/bowtie2/mm10
+#dm3	dm3		D. melanogaster (dm3)	/depot/data2/galaxy/mm10/bowtie2/dm3
+#
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Mar 22 09:56:09 2017 -0400
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of indexes in the Bowtie2 mapper format -->
+    <table name="bowtie2_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/bowtie2_indices.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Wed Mar 22 09:56:09 2017 -0400
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of indexes in the Bowtie2 mapper format -->
+    <table name="bowtie2_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/bowtie2_indices.loc" />
+    </table>
+</tables>