comparison chipseeker.xml @ 1:95f779f4adb7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/chipseeker commit 3419a5a5e19a93369c8c20a39babe5636a309292
author rnateam
date Tue, 29 May 2018 15:08:04 -0400
parents 58ef4507ce5a
children cb133602cd9b
comparison
equal deleted inserted replaced
0:58ef4507ce5a 1:95f779f4adb7
1 <tool id="chipseeker" name="ChIPseeker" version="1.14.2"> 1 <tool id="chipseeker" name="ChIPseeker" version="1.14.2.1">
2 <description>for ChIP peak annotation and visualization</description> 2 <description>for ChIP peak annotation and visualization</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="1.14.2">bioconductor-chipseeker</requirement> 4 <requirement type="package" version="1.14.2">bioconductor-chipseeker</requirement>
5 <requirement type="package" version="3.4.0">bioconductor-txdb.hsapiens.ucsc.hg38.knowngene</requirement> 5 <requirement type="package" version="1.4.4">r-optparse</requirement>
6 <requirement type="package" version="3.2.2">bioconductor-txdb.hsapiens.ucsc.hg19.knowngene</requirement>
7 <requirement type="package" version="3.4.0">bioconductor-txdb.Mmusculus.UCSC.mm10.knownGene</requirement>
8 <requirement type="package" version="3.5.0">bioconductor-org.hs.eg.db</requirement>
9 <requirement type="package" version="3.5.0">bioconductor-org.mm.eg.db</requirement>
10 </requirements> 6 </requirements>
11 <version_command><![CDATA[ 7 <version_command><![CDATA[
12 echo $(R --version | grep version | grep -v GNU)", ChIPseeker version" $(R --vanilla --slave -e "library(ChIPseeker); cat(sessionInfo()\$otherPkgs\$ChIPseeker\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", TxDb.Hsapiens.UCSC.hg38.knownGene version" $(R --vanilla --slave -e "library(TxDb.Hsapiens.UCSC.hg38.knownGene); cat(sessionInfo()\$otherPkgs\$TxDb.Hsapiens.UCSC.hg38.knownGene\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", TxDb.Hsapiens.UCSC.hg19.knownGene version" $(R --vanilla --slave -e "library(TxDb.Hsapiens.UCSC.hg19.knownGene); cat(sessionInfo()\$otherPkgs\$TxDb.Hsapiens.UCSC.hg19.knownGene\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", TxDb.Mmusculus.UCSC.mm10.knownGene version" $(R --vanilla --slave -e "library(TxDb.Mmusculus.UCSC.mm10.knownGene); cat(sessionInfo()\$otherPkgs\$TxDb.Mmusculus.UCSC.mm10.knownGene\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Hs.eg.db version" $(R --vanilla --slave -e "library(org.Hs.eg.db); cat(sessionInfo()\$otherPkgs\$org.Hs.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Mm.eg.db version" $(R --vanilla --slave -e "library(org.Mm.eg.db); cat(sessionInfo()\$otherPkgs\$org.Mm.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ") 8 echo $(R --version | grep version | grep -v GNU)", ChIPseeker version" $(R --vanilla --slave -e "library(ChIPseeker); cat(sessionInfo()\$otherPkgs\$ChIPseeker\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
13 ]]></version_command> 9 ]]></version_command>
14 <command detect_errors="exit_code"><![CDATA[ 10 <command detect_errors="exit_code"><![CDATA[
11 #set gtf = "refgtf"
12 #if $gtf_source.gtf_source_select == "history":
13 ln -s '${gtf_source.gtf_hist}' $gtf &&
14 #else if $gtf_source.gtf_source_select == "cached":
15 ln -s '${gtf_source.gtf_builtin.fields.path}' $gtf &&
16 #end if
17
15 #if $rscript: 18 #if $rscript:
16 cp '${chipseeker_script}' '${out_rscript}' && 19 cp '$__tool_directory__/chipseeker.R' '$out_rscript' &&
17 #end if 20 #end if
18 Rscript '${chipseeker_script}' 21
22 Rscript '$__tool_directory__/chipseeker.R'
23
24 -i '$peaks'
25 -G '$gtf'
26 -u $upstream
27 -d $downstream
28 #if $flankgeneinfo:
29 -F $flankgeneinfo
30 -D $flankgenedist
31 #end if
32 -f $format
33 -p $pdf
19 ]]> 34 ]]>
20 </command> 35 </command>
21 <configfiles>
22 <configfile name="chipseeker_script"><![CDATA[
23 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
24
25 # we need that to not crash galaxy with an UTF8 error on German LC settings.
26 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
27
28 suppressPackageStartupMessages(library(ChIPseeker))
29
30 genome <- "${genome}"
31
32 if (genome == "hg38") {
33 suppressPackageStartupMessages({
34 library(TxDb.Hsapiens.UCSC.hg38.knownGene)
35 library(org.Hs.eg.db)
36 })
37 txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
38 annodb <- "org.Hs.eg.db"
39 } else if (genome == "hg19") {
40 suppressPackageStartupMessages({
41 library(TxDb.Hsapiens.UCSC.hg19.knownGene)
42 library(org.Hs.eg.db)
43 })
44 txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
45 annodb <- "org.Hs.eg.db"
46 } else if (genome == "mm10") {
47 suppressPackageStartupMessages({
48 library(TxDb.Mmusculus.UCSC.mm10.knownGene)
49 library(org.Mm.eg.db)
50 })
51 txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
52 annodb <- "org.Mm.eg.db"
53 } else {
54 cat(paste("Genome not supported", genome))
55 }
56
57 peaks <- readPeakFile('$peaks_file')
58 peakAnno <- annotatePeak(peaks, TxDb=txdb, annoDb=annodb)
59 write.table(peakAnno, file='$out_tab', sep="\t", row.names=FALSE, quote=FALSE)
60
61 if (!is.null("${pdf}")) {
62 pdf("out.pdf", width=14)
63 plotAnnoPie(peakAnno)
64 plotAnnoBar(peakAnno)
65 vennpie(peakAnno)
66 upsetplot(peakAnno)
67 plotDistToTSS(peakAnno, title="Distribution of transcription factor-binding loci\nrelative to TSS")
68 dev.off()
69 }
70 ]]></configfile>
71 </configfiles>
72
73 <inputs> 36 <inputs>
74 <param name="peaks_file" type="data" format="bed" label="Peaks file" help="A peaks file in BED format." /> 37 <param name="peaks" type="data" format="bed,interval" label="Peaks file" help="A peaks file in BED format." />
75 <param name="genome" type="select" label="Genome" help="Select the genome. Options are hg38, hg19 or mm10."> 38 <conditional name="gtf_source">
76 <option value="hg38">hg38</option> 39 <param name="gtf_source_select" type="select" label="Annotation source" help="Select a GTF to use for annotation source.">
77 <option value="hg19">hg19</option> 40 <option value="cached" selected="true">Use a built-in GTF</option>
78 <option value="mm10">mm10</option> 41 <option value="history">Use a GTF from history</option>
42 </param>
43 <when value="cached">
44 <param name="gtf_builtin" type="select" label="Select a built-in GTF" help="If the GTF file for your transcriptome of interest is not listed, contact your Galaxy administrator">
45 <options from_data_table="gene_sets">
46 <filter type="sort_by" column="2" />
47 <validator type="no_options" message="No GTF file is available." />
48 </options>
49 </param>
50 </when>
51 <when value="history">
52 <param name="gtf_hist" type="data" format="gtf" label="Select a history GTF" />
53 </when>
54 </conditional>
55 <param name="upstream" type="integer" min="0" value="3000" label="TSS upstream region" help="User can define TSS (transcription start site) region, by default TSS is defined from -3kb to +3kb." />
56 <param name="downstream" type="integer" min="0" value="3000" label="TSS downstream region" help="User can define TSS (transcription start site) region, by default TSS is defined from -3kb to +3kb."/>
57 <param name="flankgeneinfo" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Add flanking gene information?" help="If specified all genes within the flanking gene distance are reported for each peak. Default: No."/>
58 <param name="flankgenedist" type="integer" min="0" value="5000" label="Flanking gene distance" help="If flanking gene info is turned on the flanking distance can be specified. Default: 5000."/>
59 <param name="format" type="select" label="Output Format">
60 <option value="interval" selected="True">Interval</option>
61 <option value="tabular">Tabular (tab-separated)</option>
79 </param> 62 </param>
80 63 <param name="pdf" type="boolean" truevalue="True" falsevalue="" checked="True" label="Output PDF of plots?" help="Default: Yes" />
81 <param name="pdf" type="boolean" truevalue="True" falsevalue="" checked="True" label="Output a PDF file of plots?" help="Default: Yes" />
82 <param name="rscript" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used to annotate the IDs will be provided as a text file in the output. Default: No" /> 64 <param name="rscript" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used to annotate the IDs will be provided as a text file in the output. Default: No" />
83 </inputs> 65 </inputs>
84 66
85 <outputs> 67 <outputs>
86 <data name="out_tab" format="tabular" from_work_dir="out.tab" label="${tool.name} on ${on_string}: Annotated Peaks" /> 68 <data name="out_tab" format="interval" from_work_dir="out.tab" label="${tool.name} on ${on_string}: Annotated Peaks" >
69 <change_format>
70 <when input="format" value="tabular" format="tabular" />
71 </change_format>
72 </data>
87 <data name="out_plots" format="pdf" from_work_dir="out.pdf" label="${tool.name} on ${on_string}: Plots"> 73 <data name="out_plots" format="pdf" from_work_dir="out.pdf" label="${tool.name} on ${on_string}: Plots">
88 <filter>pdf</filter> 74 <filter>pdf</filter>
89 </data> 75 </data>
90 <data name="out_rscript" format="txt" from_work_dir="out_rscript.txt" label="${tool.name} on ${on_string}: Rscript"> 76 <data name="out_rscript" format="txt" from_work_dir="out_rscript.txt" label="${tool.name} on ${on_string}: Rscript">
91 <filter>rscript</filter> 77 <filter>rscript</filter>
92 </data> 78 </data>
93 </outputs> 79 </outputs>
94 80
95 <tests> 81 <tests>
96 <!-- Ensure outputs work --> 82 <!-- Ensure bed and GTF inputs and all outputs work -->
97 <test expect_num_outputs="3"> 83 <test expect_num_outputs="3">
98 <param name="peaks_file" value="in.diffbind" ftype="bed"/> 84 <param name="peaks" value="in.bed" ftype="bed"/>
99 <param name="genome" value="hg19"/> 85 <param name="gtf_source_select" value="history"/>
86 <param name="gtf_hist" value="in.gtf"/>
100 <param name="rscript" value="True"/> 87 <param name="rscript" value="True"/>
101 <output name="out_tab" file="out.tab" /> 88 <output name="out_tab" ftype="interval" file="out.int" />
102 <output name="out_plots" file="out.pdf" compare="sim_size"/> 89 <output name="out_plots" file="out.pdf" compare="sim_size"/>
103 <output name="out_rscript" > 90 <output name="out_rscript" >
104 <assert_contents> 91 <assert_contents>
105 <has_text_matching expression="peakAnno.*annotatePeak" /> 92 <has_text_matching expression="peakAnno.*annotatePeak" />
106 </assert_contents> 93 </assert_contents>
107 </output> 94 </output>
108 </test> 95 </test>
96 <!-- Ensure built-in GTF works -->
97 <test expect_num_outputs="2">
98 <param name="peaks" value="in.interval" ftype="interval"/>
99 <param name="gtf_source_select" value="cached"/>
100 <output name="out_tab" ftype="interval" file="outint.int" />
101 <output name="out_plots" file="out.pdf" compare="sim_size"/>
102 </test>
103 <!-- Ensure tabular output works -->
104 <test expect_num_outputs="2">
105 <param name="peaks" value="in.interval" ftype="interval"/>
106 <param name="gtf_source_select" value="history"/>
107 <param name="gtf_hist" value="in.gtf"/>
108 <param name="format" value="tabular"/>
109 <output name="out_tab" ftype="tabular" file="outint.tab" />
110 <output name="out_plots" file="out.pdf" compare="sim_size"/>
111 </test>
112 <!-- Ensure TSS region specification works -->
113 <test expect_num_outputs="2">
114 <param name="peaks" value="in.interval" ftype="interval"/>
115 <param name="gtf_source_select" value="history"/>
116 <param name="gtf_hist" value="in.gtf"/>
117 <param name="upstream" value="1000" />
118 <param name="downstream" value="1000" />
119 <param name="format" value="tabular"/>
120 <output name="out_tab" ftype="tabular" file="outtss.tab" />
121 <output name="out_plots" file="out.pdf" compare="sim_size"/>
122 </test>
123 <!-- Ensure flanking genes works -->
124 <test expect_num_outputs="2">
125 <param name="peaks" value="in.interval" ftype="interval"/>
126 <param name="gtf_source_select" value="history"/>
127 <param name="gtf_hist" value="in.gtf"/>
128 <param name="flankgeneinfo" value="True" />
129 <param name="format" value="tabular"/>
130 <output name="out_tab" ftype="tabular" file="outflank.tab" />
131 <output name="out_plots" file="out.pdf" compare="sim_size"/>
132 </test>
109 </tests> 133 </tests>
110 <help><![CDATA[ 134 <help><![CDATA[
111 135
112 .. class:: infomark 136 .. class:: infomark
113 137
118 142
119 ----- 143 -----
120 144
121 **Inputs** 145 **Inputs**
122 146
123 A peaks file in BED format e.g from MACS2 or DiffBind. 147 A peaks file in BED or Interval format e.g from MACS2 or DiffBind.
148
149 Example:
150
151 ===== ====== ====== ======== ===== ======
152 Chrom Start End Name Score Strand
153 ===== ====== ====== ======== ===== ======
154 18 394599 396513 DiffBind 0 .
155 18 111566 112005 DiffBind 0 .
156 18 346463 347342 DiffBind 0 .
157 18 399013 400382 DiffBind 0 .
158 18 371109 372102 DiffBind 0 .
159 ===== ====== ====== ======== ===== ======
160
161 A GTF file for annotation.
124 162
125 ----- 163 -----
126 164
127 **Outputs** 165 **Outputs**
128 166
129 This tool outputs 167 This tool outputs
130 168
131 * a table of annotated peaks 169 * a file of annotated peaks in Interval or Tabular format
132 * a PDF of plots 170 * a PDF of plots
133 * the R script used by this tool 171 * the R script used by this tool
172
173 **Annotated peaks**
174
175 Annotation similar to below will be added to the input file.
176
177 Example - **Interval format**:
178
179 ===== ====== ====== =====================================================================================================================================================
180 Chrom Start End Comment
181 ===== ====== ====== =====================================================================================================================================================
182 18 394599 396513 DiffBind|0|.|Intron (ENST00000400256/ENSG00000158270, intron 1 of 1)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256| 3869
183 18 111566 112005 DiffBind|0|.|Promoter (<=1kb)|1|111568|112005| 438|1|ENSG00000263006|ENST00000608049| 0
184 18 346463 347342 DiffBind|0|.|Exon (ENST00000400256/ENSG00000158270, exon 1 of 1)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256|53040
185 18 399013 400382 DiffBind|0|.|Promoter (<=1kb)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256| 0
186 18 371109 372102 DiffBind|0|.|Intron (ENST00000400256/ENSG00000158270, intron 1 of 1)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256|28280
187 ===== ====== ====== =====================================================================================================================================================
188
189 Columns contain the following data:
190
191 * **Chrom**: Chromosome name
192 * **Start**: Start position of site
193 * **End**: End position of site
194 * **Comment**: The pipe ("|") separated values in this column correspond to:
195
196 * *<Any additional input columns>*
197 * *annotation* (Promoter, 5’ UTR, 3’ UTR, Exon, Intron, Downstream, Intergenic)
198 * *geneChr*
199 * *geneStart*
200 * *geneEnd*
201 * *geneLength*
202 * *geneStrand*
203 * *geneId*
204 * *transcriptId*
205 * *distanceToTSS*
206
207 Example - **Tabular format**:
208
209 ===== ====== ====== ======== ====== ====== =========================================== ======================================================= ======= ========= ======= ========== ========== =============== =============== =============
210 Chrom Start End Name Score Strand Comment annotation geneChr geneStart geneEnd geneLength geneStrand geneId transcriptId distanceToTSS
211 ===== ====== ====== ======== ====== ====== =========================================== ======================================================= ======= ========= ======= ========== ========== =============== =============== =============
212 18 394599 396513 DiffBind 0 . 1914|7.15|5.55|7.89|-2.35|7.06e-24|9.84e-21 Intron (ENST00000400256/ENSG00000158270, intron 1 of 1) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 3869
213 18 111566 112005 DiffBind 0 . 439|5.71|6.53|3.63|2.89|1.27e-08|8.88e-06 Promoter (<=1kb) 1 111568 112005 438 1 ENSG00000263006 ENST00000608049 0
214 18 346463 347342 DiffBind 0 . 879|5|5.77|3.24|2.52|6.51e-06|0.00303 Exon (ENST00000400256/ENSG00000158270, exon 1 of 1) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 53040
215 18 399013 400382 DiffBind 0 . 1369|7.62|7|8.05|-1.04|1.04e-05|0.00364 Promoter (<=1kb) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 0
216 18 371109 372102 DiffBind 0 . 993|4.63|3.07|5.36|-2.3|8.1e-05|0.0226 Intron (ENST00000400256/ENSG00000158270, intron 1 of 1) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 28280
217 ===== ====== ====== ======== ====== ====== =========================================== ======================================================= ======= ========= ======= ========== ========== =============== =============== =============
134 218
135 .. _ChIPseeker: https://bioconductor.org/packages/release/bioc/html/ChIPseeker.html 219 .. _ChIPseeker: https://bioconductor.org/packages/release/bioc/html/ChIPseeker.html
136 .. _`ChIPseeker vignette`: http://bioconductor.org/packages/release/bioc/vignettes/ChIPseeker/inst/doc/ChIPseeker.html 220 .. _`ChIPseeker vignette`: http://bioconductor.org/packages/release/bioc/vignettes/ChIPseeker/inst/doc/ChIPseeker.html
137 221
138 ]]></help> 222 ]]></help>