diff [APliBio]Nebula tools suite/Nebula/IntersectBed/bedtools_intersectBed.xml @ 0:2ec3ba0e9e70 draft

Uploaded
author alermine
date Thu, 25 Oct 2012 08:18:25 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/[APliBio]Nebula tools suite/Nebula/IntersectBed/bedtools_intersectBed.xml	Thu Oct 25 08:18:25 2012 -0400
@@ -0,0 +1,153 @@
+<tool id="bedtools_intersectBed" name="IntersectBed" version="v2.10.0">
+  <description>Report overlaps between two feature files</description>
+  <command>bedtools intersect
+  
+  $invert $count
+  
+  -f $min_overlap
+
+  #if str($typeA.file_typeA)=="bam"
+  -abam $typeA.fileA -b $typeB.fileB > $outbamfile;
+  #end if
+  
+  #if str($typeA.file_typeA)=="bed"
+  $report.type_report -a $typeA.fileA -b $typeB.fileB > $outbedfile;
+  #end if
+  
+  #if str($typeA.file_typeA)=="gff"
+  $report.type_report -a $typeA.fileA -b $typeB.fileB > $outgfffile;
+  #end if
+  
+  #if str($typeA.file_typeA)=="vcf"
+  $report.type_report -a $typeA.fileA -b $typeB.fileB > $outvcffile;
+  #end if
+
+  </command>
+  <inputs>
+    
+    <conditional name="typeA">
+      <param name="file_typeA" type="select" label="Select the file A type to intersect">
+          <option value="bed">BED (.bed)</option>
+          <option value="bam">BAM (.bam)</option>
+          <option value="vcf">VCF (.vcf)</option>
+          <option value="gff">GFF (.gff)</option>
+      </param>
+    <when value="bam">
+      <param name="fileA" format="bam" type="data" label="BAM file"/>
+    </when>
+    <when value="bed">
+     <param name="fileA" format="bed" type="data" label="BED file"/>
+    </when>
+     <when value="vcf">
+      <param name="fileA" format="vcf" type="data" label="VCF file"/>
+    </when>
+    <when value="gff">
+     <param name="fileA" format="gff" type="data" label="GFF file"/>
+    </when>
+    </conditional>
+    
+    <conditional name="typeB">
+      <param name="file_typeB" type="select" label="Select the file B type to intersect">
+          <option value="bed">BED (.bed)</option>
+          <option value="vcf">VCF (.vcf)</option>
+          <option value="gff">GFF (.gff)</option>
+      </param>
+    <when value="bed">
+     <param name="fileB" format="bed" type="data" label="BED file"/>
+    </when>
+     <when value="vcf">
+      <param name="fileB" format="vcf" type="data" label="VCF file"/>
+    </when>
+    <when value="gff">
+     <param name="fileB" format="gff" type="data" label="GFF file"/>
+    </when>
+    </conditional>
+    
+    <conditional name="report">
+      <param name="type_report" type="select" label="Select the type of report" help="not used if the file A type is BAM">
+         <option value="">write the base-pair overlap between A and B</option>
+         <option value="-wa">write the original entry in A for each overlap (-wa)</option>
+        <!--  <option value="-wb">-wb : write the original entry in B for each overlap</option> -->
+          <option value="-wo">write the original A and B entries plus the number of base pairs of overlap between the two features (-wo)</option>
+          <option value="-wao">write the original A and B entries plus the number of base pairs of overlap between the two features (-wao)</option>
+      </param>
+    <when value="">
+     <!-- do nothing here -->
+    </when>
+    <when value="-wa">
+     <!-- do nothing here -->
+    </when>
+   <!-- <when value="-wb">
+    </when> -->
+    <when value="-wo">
+     <!-- do nothing here -->
+    </when>
+    <when value="-wao">
+    <!-- do nothing here -->
+    </when>
+    </conditional>
+    
+  <param name="invert" label="Only report those entries in A that have _no overlaps_ with B?" type="boolean" truevalue="-v" falsevalue="" checked="no"/>
+  <param name="count" label="For each entry in A, report the number of overlaps with B?" type="boolean" truevalue="-c" falsevalue=""  checked="no"/>
+  
+  <param name="min_overlap" type="float" size="10"  value="0.05" label="Minimum overlap required as a fraction of A" help="5% by default" />
+  <param name="file_name" type="text" size="20" value="sample" label="File name (without file extension)"/>  
+  </inputs>
+  
+  <outputs>
+    <data format="bam" name="outbamfile" label="${file_name}.bam">
+      <filter>typeA['file_typeA']=="bam"</filter>
+    </data>
+   <data format="bed" name="outbedfile" label="${file_name}.bed">
+      <filter>typeA['file_typeA']=="bed"</filter>
+    </data>
+    <data format="gff" name="outgfffile" label="${file_name}.gff">
+      <filter>typeA['file_typeA']=="gff"</filter>
+    </data>
+    <data format="vcf" name="outvcffile" label="${file_name}.vcf">
+      <filter>typeA['file_typeA']=="vcf"</filter>
+    </data>-->
+  </outputs>
+  
+  <help>
+**What it does**
+
+This tool use the "intersectBed" function of Bedtools to report overlaps between two feature files.
+
+Note: When intersecting SNPs, make sure the coordinate conform to the UCSC format.
+That is, the start position for each SNP should be SNP position - 1 and the end position should be SNP position. E.g. chr7 10000001 10000002 rs123464
+
+    * Report the base-pair overlap between sequence alignments and genes. 
+      $ intersectBed -a reads.bed -b genes.bed 
+    * Report whether each alignment overlaps one or more genes. If not, the alignment is not reported. 
+      $ intersectBed -a reads.bed -b genes.bed -u 
+    * Report those alignments that overlap NO genes. Like "grep -v" 
+      $ intersectBed -a reads.bed -b genes.bed -v 
+    * Report the number of genes that each alignment overlaps. 
+      $ intersectBed -a reads.bed -b genes.bed -c 
+    * Report the entire, original alignment entry for each overlap with a gene. 
+      $ intersectBed -a reads.bed -b genes.bed -wa 
+    * Report the entire, original alignment and genes entries for each overlap plus the number of base pairs. Only reads features with overlap are reported. 
+      $ intersectBed -a reads.bed -b genes.bed -wo 
+    * Report the entire, original alignment and genes entries for each overlap plus the number of base pairs.  However, alignment features w/o overlap are also reported with a NULL gene feature and overlap = 0.
+      $ intersectBed -a reads.bed -b genes.bed -wao 
+    * Only report an overlap with a repeat if it spans at least 50% of the exon. 
+      $ intersectBed -a exons.bed -b repeatMasker.bed –f 0.50 
+    * Only report an overlap if comprises 50% of the structural variant and 50% of the segmental duplication. Thus, it is reciprocally at least a 50% overlap. 
+      $ intersectBed -a SV.bed -b segmentalDups.bed –f 0.50 -r 
+    * Read BED A from stdin. For example, find genes that overlap LINEs but not SINEs. 
+      $ intersectBed -a genes.bed -b LINES.bed | intersectBed -a stdin -b SINEs.bed -v 
+    * Retain only single-end BAM alignments that overlap exons. 
+      $ intersectBed -abam reads.bam -b exons.bed > reads.touchingExons.bam 
+    * Retain only single-end BAM alignments that do not overlap simple sequence repeats. 
+      $ intersectBed -abam reads.bam -b SSRs.bed -v > reads.noSSRs.bam
+      
+-----
+
+.. class:: infomark
+
+Contact Bruno Zeitouni (bruno.zeitouni@curie.fr) for any questions or concerns about the Galaxy implementation of IntersectBed.
+      
+  </help>
+
+</tool>