Mercurial > repos > mvdbeek > damidseq_core
changeset 0:eb3a145c4962 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damidseq_core commit b'33637968e1e32c02d7765a6701e930a0ea0dd903\n'
author | mvdbeek |
---|---|
date | Wed, 22 Mar 2017 09:56:09 -0400 |
parents | |
children | 0d1514ecd757 |
files | damidseq_core.xml test-data/A001.fastq test-data/A002.fastq test-data/bowtie2_indices.loc test-data/bt2/.GATC.gff test-data/bt2/dm6.fa.1.bt2 test-data/bt2/dm6.fa.2.bt2 test-data/bt2/dm6.fa.3.bt2 test-data/bt2/dm6.fa.4.bt2 test-data/bt2/dm6.fa.rev.1.bt2 test-data/bt2/dm6.fa.rev.2.bt2 test-data/control.bam test-data/dm6.GATC.gff test-data/fusion.bam test-data/output_ratio.bedgraph tool-data/bowtie2_indices.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 18 files changed, 187 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/damidseq_core.xml Wed Mar 22 09:56:09 2017 -0400 @@ -0,0 +1,118 @@ +<tool id="damidseq_core" name="damidseq" version="0.1.0"> + <description>align, extend and normalize a DAMID-seq experiment</description> + <requirements> + <requirement type="package" version="1.4">damidseq_pipeline</requirement> + </requirements> + <version_command><![CDATA[damidseq_pipeline --help 2>&1| grep damidseq_pipeline]]></version_command> + <command detect_errors="aggressive"><![CDATA[ + export HOME="\$PWD" && + ln -f -s '$dam' A001.$dam.ext && + ln -f -s '$dam_fusion' A002.$dam_fusion.ext && + ln -f -s '$index' index.txt && + damidseq_pipeline + --bins=$bins + --bowtie=1 + --bowtie2_genome_dir='$reference_index.fields.path' + --extend_reads=$extend_reads + --extension_method='$extension_method' + $full_data_files + --gatc_frag_file='$gatc_frag_file' + --len=$len + --max_norm_value='$max_norm_value' + $method_subtract + --min_norm_value='$min_norm_value' + --norm_method=$norm_method + --norm_steps=$norm_steps + --output_format=$output_format + --q=$q + --qscore1max=$qscore1max + --qscore1min=$qscore1min + --qscore2max=$qscore2max + --threads=\${GALAXY_SLOTS:-4} && + mv Fusion-vs-Dam.*.$output_format fusion.output + ]]></command> + <configfiles> + <configfile name="index">A1 Dam +A2 Fusion</configfile> + </configfiles> + <inputs> + <param argument="--dam" type="data" format="fastq,fastq.gz" label="Control DAM alignment file"/> + <param name="dam_fusion" type="data" format="fastq,fastq.gz" label="DAM fusion alignment file"/> + <param name="reference_index" type="select" label="Select reference genome" help="If your genome of interest is not listed, contact the Galaxy team"> + <options from_data_table="bowtie2_indexes"> + <filter type="sort_by" column="2"/> + <validator type="no_options" message="No indexes are available for the selected input dataset"/> + </options> + </param> + <param argument="--gatc_frag_file" type="data" format="gff" label="GFF file with all GATC locations"/> + <param name="output_format" type="select" label="Select the output format for the peaks"> + <option value="bedgraph">Bedgraph</option> + <option value="gff">GFF</option> + </param> + <param argument="--extend_reads" type="boolean" truevalue="1" falsevalue="0" checked="True" label="Perform read extension?"/> + <param argument="--extension_method" type="select" label="Select the read extension method" help="Select Full to extend all reads or GATC to extend reads to --len or to the next GATC site, whichever is shorter. Using this option increases peak resolution (default)."> + <option value="gatc">To nearest GATC site</option> + <option value="full">Full</option> + </param> + <param argument="--full_data_files" type="boolean" truevalue="--full_data_file" falsevalue="" label="Output full binned ratio files (not only GATC array)"/> + <param argument="--len" type="integer" min="50" value="300" label="Length to extend reads to"/> + <param argument="--bins" type="integer" min="10" value="75" label="Width of bins to use for mapping reads"/> + <param argument="--min_norm_value" type="float" value="-5.0" label="Minimum log2 value to limit normalisation search at"/> + <param argument="--max_norm_value" type="float" value="5.0" label="Maximum log2 value to limit normalisation search at"/> + <param argument="--method_subtract" type="boolean" truevalue="--method_subtract" falsevalue="" label="Subtract DAM control values from DAM-fusion values instead of using the log2 ratio?"/> + <param argument="--norm_method" type="select" label="Select normalization method"> + <option value="kde">kernel density estimation of log2 GATC fragment ratio (recommended)</option> + <option value="rpm">readcounts per million reads (not recommended for most use cases)</option> + </param> + <param argument="--norm_steps" type="integer" min="1" value="300" label="Number of points in normalisation routine"/> + <param argument="--q" type="integer" value="30" min="0" label="Cutoff average Q score for aligned reads"/> + <param argument="--qscore1min" type="float" min="0.0" value="0.4" max="1.0" label="min decile for normalising from Dam array"/> + <param argument="--qscore1max" type="float" min="0.0" value="1.0" max="1.0" label="max decile for normalising from Dam array"/> + <param argument="--qscore2max" type="float" min="0.0" value="1.0" max="1.0" label="max decile for normalising from fusion-protein array"/> + </inputs> + <outputs> + <data name="output_ratio" format="bedgraph" from_work_dir="fusion.output" label="DAM-fusion vs Dam-only ratio"> + <change_format> + <when input="output_format" value="gff" format="gff" /> + </change_format> + </data> + <data name="control_output" format="bam" from_work_dir="Dam-ext300.bam" label="DAM-only alignment"/> + <data name="fusion_output" format="bam" from_work_dir="Fusion-ext300.bam" label="DAM-fusion alignment"/> + </outputs> + <tests> + <test> + <param name="dam" value="A001.fastq"/> + <param name="dam_fusion" value="A002.fastq"/> + <param name="gatc_frag_file" value="dm6.GATC.gff"/> + <param name="index" value="dm6"/> + <param name="norm_method" value="rpm"/> + <output name="output_ratio" file="output_ratio.bedgraph"/> + <output name="control_output" file="control.bam"/> + <output name="fusion_output" file="fusion.bam"/> + </test> + </tests> + <help><![CDATA[ + +Processing DamID-seq data involves extending single-end reads, aligning +the reads to the genome and determining the coverage, similar to +processing regular ChIP-seq datasets. However, as DamID data is +represented as a log2 ratio of (Dam-fusion/Dam), normalisation of the +sample and Dam-only control is necessary and adding pseudocounts to +mitigate the effect of background counts is highly recommended. + +damidseq_pipeline is a single script that automatically handles +sequence alignment, read extension, binned counts, normalisation, +pseudocount addition and final ratio file generation. The script uses +FASTQ or BAM files as input, and outputs the final log2 ratio files in +bedGraph (or optionally GFF) format. + +The output ratio files can easily be converted to TDF for viewing in IGV using +igvtools. The files can be processed for peak calling using find_peaks or, if +using RNA pol II DamID, transcribed genes can be determined using +polii.gene.call. + + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btv386</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/A001.fastq Wed Mar 22 09:56:09 2017 -0400 @@ -0,0 +1,4 @@ +@SN1078:205:3:1101:15342:2122#CTTGTAA +GATGGTGACGTCCGTGTCCTGGACAATGACGACCGACGAG ++SN1078:205:3:1101:15342:2122#CTTGTAA +BBBABADDHHHHHJHIIIJJJJJJJJJJJJIJJJJGJIHI
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/A002.fastq Wed Mar 22 09:56:09 2017 -0400 @@ -0,0 +1,4 @@ +@SN1078:205:3:1101:15342:2122#CTTGTAA +GATGGTGACGTCCGTGTCCTGGACAATGACGACCGACGAG ++SN1078:205:3:1101:15342:2122#CTTGTAA +BBBABADDHHHHHJHIIIJJJJJJJJJJJJIJJJJGJIHI
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bowtie2_indices.loc Wed Mar 22 09:56:09 2017 -0400 @@ -0,0 +1,1 @@ +dm6 dm6 dm6 ${__HERE__}/bt2/dm6.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bt2/.GATC.gff Wed Mar 22 09:56:09 2017 -0400 @@ -0,0 +1,3 @@ +X . . 157 161 1 + . . +X . . 364 368 1 + . . +X . . 499 503 1 + . .
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dm6.GATC.gff Wed Mar 22 09:56:09 2017 -0400 @@ -0,0 +1,3 @@ +X . . 157 161 1 + . . +X . . 364 368 1 + . . +X . . 499 503 1 + . .
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_ratio.bedgraph Wed Mar 22 09:56:09 2017 -0400 @@ -0,0 +1,3 @@ +track type=bedGraph name="Fusion-vs-Dam" description="Fusion DamIDseq" +X 159 366 0 +X 366 501 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/bowtie2_indices.loc.sample Wed Mar 22 09:56:09 2017 -0400 @@ -0,0 +1,37 @@ +# bowtie2_indices.loc.sample +# This is a *.loc.sample file distributed with Galaxy that enables tools +# to use a directory of indexed data files. This one is for Bowtie2 and Tophat2. +# See the wiki: http://wiki.galaxyproject.org/Admin/NGS%20Local%20Setup +# First create these data files and save them in your own data directory structure. +# Then, create a bowtie_indices.loc file to use those indexes with tools. +# Copy this file, save it with the same name (minus the .sample), +# follow the format examples, and store the result in this directory. +# The file should include an one line entry for each index set. +# The path points to the "basename" for the set, not a specific file. +# It has four text columns seperated by TABS. +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +# So, for example, if you had hg18 indexes stored in: +# +# /depot/data2/galaxy/hg19/bowtie2/ +# +# containing hg19 genome and hg19.*.bt2 files, such as: +# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.fa +# -rw-rw-r-- 1 james james 914M Feb 10 18:56 hg19canon.1.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 18:56 hg19canon.2.bt2 +# -rw-rw-r-- 1 james james 3.3K Feb 10 16:54 hg19canon.3.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 16:54 hg19canon.4.bt2 +# -rw-rw-r-- 1 james james 914M Feb 10 20:45 hg19canon.rev.1.bt2 +# -rw-rw-r-- 1 james james 683M Feb 10 20:45 hg19canon.rev.2.bt2 +# +# then the bowtie2_indices.loc entry could look like this: +# +#hg19 hg19 Human (hg19) /depot/data2/galaxy/hg19/bowtie2/hg19canon +# +#More examples: +# +#mm10 mm10 Mouse (mm10) /depot/data2/galaxy/mm10/bowtie2/mm10 +#dm3 dm3 D. melanogaster (dm3) /depot/data2/galaxy/mm10/bowtie2/dm3 +# +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Mar 22 09:56:09 2017 -0400 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of indexes in the Bowtie2 mapper format --> + <table name="bowtie2_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/bowtie2_indices.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Wed Mar 22 09:56:09 2017 -0400 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of indexes in the Bowtie2 mapper format --> + <table name="bowtie2_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/bowtie2_indices.loc" /> + </table> +</tables>