Mercurial > repos > mvdbeek > damidseq_core
comparison damidseq_core.xml @ 0:eb3a145c4962 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damidseq_core commit b'33637968e1e32c02d7765a6701e930a0ea0dd903\n'
author | mvdbeek |
---|---|
date | Wed, 22 Mar 2017 09:56:09 -0400 |
parents | |
children | 0d1514ecd757 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:eb3a145c4962 |
---|---|
1 <tool id="damidseq_core" name="damidseq" version="0.1.0"> | |
2 <description>align, extend and normalize a DAMID-seq experiment</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.4">damidseq_pipeline</requirement> | |
5 </requirements> | |
6 <version_command><![CDATA[damidseq_pipeline --help 2>&1| grep damidseq_pipeline]]></version_command> | |
7 <command detect_errors="aggressive"><![CDATA[ | |
8 export HOME="\$PWD" && | |
9 ln -f -s '$dam' A001.$dam.ext && | |
10 ln -f -s '$dam_fusion' A002.$dam_fusion.ext && | |
11 ln -f -s '$index' index.txt && | |
12 damidseq_pipeline | |
13 --bins=$bins | |
14 --bowtie=1 | |
15 --bowtie2_genome_dir='$reference_index.fields.path' | |
16 --extend_reads=$extend_reads | |
17 --extension_method='$extension_method' | |
18 $full_data_files | |
19 --gatc_frag_file='$gatc_frag_file' | |
20 --len=$len | |
21 --max_norm_value='$max_norm_value' | |
22 $method_subtract | |
23 --min_norm_value='$min_norm_value' | |
24 --norm_method=$norm_method | |
25 --norm_steps=$norm_steps | |
26 --output_format=$output_format | |
27 --q=$q | |
28 --qscore1max=$qscore1max | |
29 --qscore1min=$qscore1min | |
30 --qscore2max=$qscore2max | |
31 --threads=\${GALAXY_SLOTS:-4} && | |
32 mv Fusion-vs-Dam.*.$output_format fusion.output | |
33 ]]></command> | |
34 <configfiles> | |
35 <configfile name="index">A1 Dam | |
36 A2 Fusion</configfile> | |
37 </configfiles> | |
38 <inputs> | |
39 <param argument="--dam" type="data" format="fastq,fastq.gz" label="Control DAM alignment file"/> | |
40 <param name="dam_fusion" type="data" format="fastq,fastq.gz" label="DAM fusion alignment file"/> | |
41 <param name="reference_index" type="select" label="Select reference genome" help="If your genome of interest is not listed, contact the Galaxy team"> | |
42 <options from_data_table="bowtie2_indexes"> | |
43 <filter type="sort_by" column="2"/> | |
44 <validator type="no_options" message="No indexes are available for the selected input dataset"/> | |
45 </options> | |
46 </param> | |
47 <param argument="--gatc_frag_file" type="data" format="gff" label="GFF file with all GATC locations"/> | |
48 <param name="output_format" type="select" label="Select the output format for the peaks"> | |
49 <option value="bedgraph">Bedgraph</option> | |
50 <option value="gff">GFF</option> | |
51 </param> | |
52 <param argument="--extend_reads" type="boolean" truevalue="1" falsevalue="0" checked="True" label="Perform read extension?"/> | |
53 <param argument="--extension_method" type="select" label="Select the read extension method" help="Select Full to extend all reads or GATC to extend reads to --len or to the next GATC site, whichever is shorter. Using this option increases peak resolution (default)."> | |
54 <option value="gatc">To nearest GATC site</option> | |
55 <option value="full">Full</option> | |
56 </param> | |
57 <param argument="--full_data_files" type="boolean" truevalue="--full_data_file" falsevalue="" label="Output full binned ratio files (not only GATC array)"/> | |
58 <param argument="--len" type="integer" min="50" value="300" label="Length to extend reads to"/> | |
59 <param argument="--bins" type="integer" min="10" value="75" label="Width of bins to use for mapping reads"/> | |
60 <param argument="--min_norm_value" type="float" value="-5.0" label="Minimum log2 value to limit normalisation search at"/> | |
61 <param argument="--max_norm_value" type="float" value="5.0" label="Maximum log2 value to limit normalisation search at"/> | |
62 <param argument="--method_subtract" type="boolean" truevalue="--method_subtract" falsevalue="" label="Subtract DAM control values from DAM-fusion values instead of using the log2 ratio?"/> | |
63 <param argument="--norm_method" type="select" label="Select normalization method"> | |
64 <option value="kde">kernel density estimation of log2 GATC fragment ratio (recommended)</option> | |
65 <option value="rpm">readcounts per million reads (not recommended for most use cases)</option> | |
66 </param> | |
67 <param argument="--norm_steps" type="integer" min="1" value="300" label="Number of points in normalisation routine"/> | |
68 <param argument="--q" type="integer" value="30" min="0" label="Cutoff average Q score for aligned reads"/> | |
69 <param argument="--qscore1min" type="float" min="0.0" value="0.4" max="1.0" label="min decile for normalising from Dam array"/> | |
70 <param argument="--qscore1max" type="float" min="0.0" value="1.0" max="1.0" label="max decile for normalising from Dam array"/> | |
71 <param argument="--qscore2max" type="float" min="0.0" value="1.0" max="1.0" label="max decile for normalising from fusion-protein array"/> | |
72 </inputs> | |
73 <outputs> | |
74 <data name="output_ratio" format="bedgraph" from_work_dir="fusion.output" label="DAM-fusion vs Dam-only ratio"> | |
75 <change_format> | |
76 <when input="output_format" value="gff" format="gff" /> | |
77 </change_format> | |
78 </data> | |
79 <data name="control_output" format="bam" from_work_dir="Dam-ext300.bam" label="DAM-only alignment"/> | |
80 <data name="fusion_output" format="bam" from_work_dir="Fusion-ext300.bam" label="DAM-fusion alignment"/> | |
81 </outputs> | |
82 <tests> | |
83 <test> | |
84 <param name="dam" value="A001.fastq"/> | |
85 <param name="dam_fusion" value="A002.fastq"/> | |
86 <param name="gatc_frag_file" value="dm6.GATC.gff"/> | |
87 <param name="index" value="dm6"/> | |
88 <param name="norm_method" value="rpm"/> | |
89 <output name="output_ratio" file="output_ratio.bedgraph"/> | |
90 <output name="control_output" file="control.bam"/> | |
91 <output name="fusion_output" file="fusion.bam"/> | |
92 </test> | |
93 </tests> | |
94 <help><![CDATA[ | |
95 | |
96 Processing DamID-seq data involves extending single-end reads, aligning | |
97 the reads to the genome and determining the coverage, similar to | |
98 processing regular ChIP-seq datasets. However, as DamID data is | |
99 represented as a log2 ratio of (Dam-fusion/Dam), normalisation of the | |
100 sample and Dam-only control is necessary and adding pseudocounts to | |
101 mitigate the effect of background counts is highly recommended. | |
102 | |
103 damidseq_pipeline is a single script that automatically handles | |
104 sequence alignment, read extension, binned counts, normalisation, | |
105 pseudocount addition and final ratio file generation. The script uses | |
106 FASTQ or BAM files as input, and outputs the final log2 ratio files in | |
107 bedGraph (or optionally GFF) format. | |
108 | |
109 The output ratio files can easily be converted to TDF for viewing in IGV using | |
110 igvtools. The files can be processed for peak calling using find_peaks or, if | |
111 using RNA pol II DamID, transcribed genes can be determined using | |
112 polii.gene.call. | |
113 | |
114 ]]></help> | |
115 <citations> | |
116 <citation type="doi">10.1093/bioinformatics/btv386</citation> | |
117 </citations> | |
118 </tool> |