annotate [APliBio]Nebula tools suite/Nebula/IntersectBed/bedtools_intersectBed.xml @ 3:1c699789d6d3 draft

Uploaded
author alermine
date Wed, 14 Nov 2012 06:02:48 -0500
parents 2ec3ba0e9e70
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
1 <tool id="bedtools_intersectBed" name="IntersectBed" version="v2.10.0">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
2 <description>Report overlaps between two feature files</description>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
3 <command>bedtools intersect
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
4
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
5 $invert $count
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
6
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
7 -f $min_overlap
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
8
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
9 #if str($typeA.file_typeA)=="bam"
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
10 -abam $typeA.fileA -b $typeB.fileB > $outbamfile;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
11 #end if
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
12
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
13 #if str($typeA.file_typeA)=="bed"
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
14 $report.type_report -a $typeA.fileA -b $typeB.fileB > $outbedfile;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
15 #end if
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
16
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
17 #if str($typeA.file_typeA)=="gff"
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
18 $report.type_report -a $typeA.fileA -b $typeB.fileB > $outgfffile;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
19 #end if
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
20
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
21 #if str($typeA.file_typeA)=="vcf"
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
22 $report.type_report -a $typeA.fileA -b $typeB.fileB > $outvcffile;
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
23 #end if
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
24
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
25 </command>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
26 <inputs>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
27
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
28 <conditional name="typeA">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
29 <param name="file_typeA" type="select" label="Select the file A type to intersect">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
30 <option value="bed">BED (.bed)</option>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
31 <option value="bam">BAM (.bam)</option>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
32 <option value="vcf">VCF (.vcf)</option>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
33 <option value="gff">GFF (.gff)</option>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
34 </param>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
35 <when value="bam">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
36 <param name="fileA" format="bam" type="data" label="BAM file"/>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
37 </when>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
38 <when value="bed">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
39 <param name="fileA" format="bed" type="data" label="BED file"/>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
40 </when>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
41 <when value="vcf">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
42 <param name="fileA" format="vcf" type="data" label="VCF file"/>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
43 </when>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
44 <when value="gff">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
45 <param name="fileA" format="gff" type="data" label="GFF file"/>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
46 </when>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
47 </conditional>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
48
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
49 <conditional name="typeB">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
50 <param name="file_typeB" type="select" label="Select the file B type to intersect">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
51 <option value="bed">BED (.bed)</option>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
52 <option value="vcf">VCF (.vcf)</option>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
53 <option value="gff">GFF (.gff)</option>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
54 </param>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
55 <when value="bed">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
56 <param name="fileB" format="bed" type="data" label="BED file"/>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
57 </when>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
58 <when value="vcf">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
59 <param name="fileB" format="vcf" type="data" label="VCF file"/>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
60 </when>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
61 <when value="gff">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
62 <param name="fileB" format="gff" type="data" label="GFF file"/>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
63 </when>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
64 </conditional>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
65
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
66 <conditional name="report">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
67 <param name="type_report" type="select" label="Select the type of report" help="not used if the file A type is BAM">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
68 <option value="">write the base-pair overlap between A and B</option>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
69 <option value="-wa">write the original entry in A for each overlap (-wa)</option>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
70 <!-- <option value="-wb">-wb : write the original entry in B for each overlap</option> -->
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
71 <option value="-wo">write the original A and B entries plus the number of base pairs of overlap between the two features (-wo)</option>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
72 <option value="-wao">write the original A and B entries plus the number of base pairs of overlap between the two features (-wao)</option>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
73 </param>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
74 <when value="">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
75 <!-- do nothing here -->
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
76 </when>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
77 <when value="-wa">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
78 <!-- do nothing here -->
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
79 </when>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
80 <!-- <when value="-wb">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
81 </when> -->
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
82 <when value="-wo">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
83 <!-- do nothing here -->
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
84 </when>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
85 <when value="-wao">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
86 <!-- do nothing here -->
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
87 </when>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
88 </conditional>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
89
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
90 <param name="invert" label="Only report those entries in A that have _no overlaps_ with B?" type="boolean" truevalue="-v" falsevalue="" checked="no"/>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
91 <param name="count" label="For each entry in A, report the number of overlaps with B?" type="boolean" truevalue="-c" falsevalue="" checked="no"/>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
92
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
93 <param name="min_overlap" type="float" size="10" value="0.05" label="Minimum overlap required as a fraction of A" help="5% by default" />
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
94 <param name="file_name" type="text" size="20" value="sample" label="File name (without file extension)"/>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
95 </inputs>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
96
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
97 <outputs>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
98 <data format="bam" name="outbamfile" label="${file_name}.bam">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
99 <filter>typeA['file_typeA']=="bam"</filter>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
100 </data>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
101 <data format="bed" name="outbedfile" label="${file_name}.bed">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
102 <filter>typeA['file_typeA']=="bed"</filter>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
103 </data>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
104 <data format="gff" name="outgfffile" label="${file_name}.gff">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
105 <filter>typeA['file_typeA']=="gff"</filter>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
106 </data>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
107 <data format="vcf" name="outvcffile" label="${file_name}.vcf">
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
108 <filter>typeA['file_typeA']=="vcf"</filter>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
109 </data>-->
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
110 </outputs>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
111
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
112 <help>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
113 **What it does**
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
114
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
115 This tool use the "intersectBed" function of Bedtools to report overlaps between two feature files.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
116
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
117 Note: When intersecting SNPs, make sure the coordinate conform to the UCSC format.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
118 That is, the start position for each SNP should be SNP position - 1 and the end position should be SNP position. E.g. chr7 10000001 10000002 rs123464
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
119
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
120 * Report the base-pair overlap between sequence alignments and genes.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
121 $ intersectBed -a reads.bed -b genes.bed
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
122 * Report whether each alignment overlaps one or more genes. If not, the alignment is not reported.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
123 $ intersectBed -a reads.bed -b genes.bed -u
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
124 * Report those alignments that overlap NO genes. Like "grep -v"
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
125 $ intersectBed -a reads.bed -b genes.bed -v
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
126 * Report the number of genes that each alignment overlaps.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
127 $ intersectBed -a reads.bed -b genes.bed -c
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
128 * Report the entire, original alignment entry for each overlap with a gene.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
129 $ intersectBed -a reads.bed -b genes.bed -wa
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
130 * Report the entire, original alignment and genes entries for each overlap plus the number of base pairs. Only reads features with overlap are reported.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
131 $ intersectBed -a reads.bed -b genes.bed -wo
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
132 * Report the entire, original alignment and genes entries for each overlap plus the number of base pairs. However, alignment features w/o overlap are also reported with a NULL gene feature and overlap = 0.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
133 $ intersectBed -a reads.bed -b genes.bed -wao
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
134 * Only report an overlap with a repeat if it spans at least 50% of the exon.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
135 $ intersectBed -a exons.bed -b repeatMasker.bed –f 0.50
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
136 * Only report an overlap if comprises 50% of the structural variant and 50% of the segmental duplication. Thus, it is reciprocally at least a 50% overlap.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
137 $ intersectBed -a SV.bed -b segmentalDups.bed –f 0.50 -r
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
138 * Read BED A from stdin. For example, find genes that overlap LINEs but not SINEs.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
139 $ intersectBed -a genes.bed -b LINES.bed | intersectBed -a stdin -b SINEs.bed -v
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
140 * Retain only single-end BAM alignments that overlap exons.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
141 $ intersectBed -abam reads.bam -b exons.bed > reads.touchingExons.bam
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
142 * Retain only single-end BAM alignments that do not overlap simple sequence repeats.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
143 $ intersectBed -abam reads.bam -b SSRs.bed -v > reads.noSSRs.bam
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
144
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
145 -----
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
146
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
147 .. class:: infomark
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
148
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
149 Contact Bruno Zeitouni (bruno.zeitouni@curie.fr) for any questions or concerns about the Galaxy implementation of IntersectBed.
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
150
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
151 </help>
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
152
2ec3ba0e9e70 Uploaded
alermine
parents:
diff changeset
153 </tool>