Repository 'probecoverage'
hg clone https://toolshed.g2.bx.psu.edu/repos/artbio/probecoverage

Changeset 2:35d2db3753d9 (2017-09-24)
Previous changeset 1:ebe5ec2e244d (2017-09-23) Next changeset 3:4f744d3aaf0b (2017-09-24)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29
modified:
probecoverage.xml
test-data/graph.pdf
added:
multicov.py
test-data/coverage_pysam.tab
test-data/graph_pysam.pdf
b
diff -r ebe5ec2e244d -r 35d2db3753d9 multicov.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/multicov.py Sun Sep 24 13:34:16 2017 -0400
[
@@ -0,0 +1,60 @@
+import argparse
+
+import numpy
+
+import pysam
+
+
+def Parser():
+    the_parser = argparse.ArgumentParser()
+    the_parser.add_argument('-bams', '--bams', dest='bams', required=True,
+                            nargs='+', help='list of input BAM files')
+    the_parser.add_argument('-bed', '--bed', dest='bed', required=True,
+                            help='Coordinates of probes in a bed file')
+    args = the_parser.parse_args()
+    return args
+
+
+def compute_coverage(bam, bed, quality=10):
+    bam_object = pysam.AlignmentFile(bam, 'rb')
+    bed_object = open(bed, 'r')
+    coverage_column = []
+    for line in bed_object:
+        if line[0] == '#':
+            continue
+        fields = line[:-1].split('\t')
+        chr = fields[0]
+        start = fields[1]
+        end = fields[2]
+        coverage = bam_object.count_coverage(reference=chr,
+                                             start=int(start)-1,
+                                             end=int(end),
+                                             quality_threshold=quality)
+        """ Add the 4 coverage values """
+        coverage = [sum(x) for x in zip(*coverage)]
+        coverage_column.append(numpy.mean(coverage))
+    bed_object.close()
+    return coverage_column
+
+
+def main(bams, bed):
+    column_dict = {}
+    for i, bam in enumerate(bams):
+        column_dict[i] = compute_coverage(bam, bed)
+    F = open(bed, 'r')
+    line_counter = 0
+    for line in F:
+        if line[0] == '#':
+            continue
+        prefix = line[:-1]
+        crossline = []
+        for col in sorted(column_dict):
+            crossline.append(str(column_dict[col][line_counter]))
+        line_counter += 1
+        suffix = '\t'.join(crossline)
+        print('%s\t%s' % (prefix, suffix))
+
+
+if __name__ == "__main__":
+    args = Parser()
+    main(args.bams, args.bed)
b
diff -r ebe5ec2e244d -r 35d2db3753d9 probecoverage.xml
--- a/probecoverage.xml Sat Sep 23 12:56:17 2017 -0400
+++ b/probecoverage.xml Sun Sep 24 13:34:16 2017 -0400
b
@@ -1,11 +1,12 @@
-<tool id="probecoverage" name="Probe Coverage" version="0.2.0">
+<tool id="probecoverage" name="Probe Coverage" version="0.3.0">
   <description></description>
   <requirements>
         <requirement type="package" version="1.4.1">samtools</requirement>
         <requirement type="package" version="2.26.0">bedtools</requirement>
         <requirement type="package" version="1.3.2=r3.3.1_0">r-optparse</requirement>
         <requirement type="package" version="2.2.1=r3.3.1_0">r-ggplot2</requirement>
-        <requirement type="package" version="1.4.2=r3.3.1_0">r-reshape2</requirement>
+        <requirement type="package" version="1.11.2=py27_0">numpy</requirement>
+        <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement>
   </requirements>
   <stdio>
       <exit_code range="1:" level="fatal" description="Tool exception" />
@@ -14,16 +15,25 @@
       #for $file in $inputs
           samtools index '$file' &&
       #end for
-      bedtools multicov
+      #if $method == 'pysam':
+          python $__tool_directory__/multicov.py
+      #else:
+          bedtools multicov
+      #end if
           -bams
               #for $file in $inputs
                   '$file'
               #end for
-          -bed '$bed' > $bedtools_table &&
+          -bed '$bed' > $coverage_dataframe &&
       Rscript '$__tool_directory__'/probecoverage.r
-          --input '$bedtools_table' 
+          --input '$coverage_dataframe' 
           --title 'Probe coverage depth (cumulative distribution)'
-          --xlab 'Depth'
+          --xlab
+          #if $method == 'pysam':
+              'Depth (pysam method)'
+          #else:
+              'Depth (bedtools muticov method)'
+          #end if
           --ylab 'Fraction of covered regions with coverage >= Depth'
           --output '$distribution'
           --sample "
@@ -33,13 +43,16 @@
               "
               
   ]]></command>
- <inputs>
-   <param name="inputs" type="data" format="bam" label="Select multiple Bam alignments to parse" multiple="True"/>
-   <param name="bed" type="data" format="bed" label="Select a bed file describing the genomic regions to analyze" />
- </inputs>
+<inputs>
+    <param name="inputs" type="data" format="bam" label="Select multiple Bam alignments to parse" multiple="True"/>
+    <param name="bed" type="data" format="bed" label="Select a bed file describing the genomic regions to analyze" />
+    <param name="method" type="boolean" falsevalue="bedtools" checked="false" truevalue="pysam"
+           help="coverage is computed using bedtools multicov (default) or pysam module"
+           label="Compute coverage with pysam (bedtools by default)" />
+</inputs>
 
  <outputs>
-   <data format="tabular" name="bedtools_table" label="bedtools multicov output" />
+   <data format="tabular" name="coverage_dataframe" label="coverage table" />
    <data format="pdf" name="distribution" label="Cumulative distribution of region coverages" />
 </outputs>
 
@@ -47,9 +60,17 @@
         <test>
             <param name="inputs" value="sample1,sample2,sample3" ftype="bam"/>
             <param name="bed" value="probes.bed" ftype="bed"/>
-            <output file="coverage.tab" name="bedtools_table" />
+            <param name="method" value="bedtools" />
+            <output file="coverage.tab" name="coverage_dataframe" />
             <output file="graph.pdf" name="distribution" />
         </test>
+        <test>
+            <param name="inputs" value="sample1,sample2,sample3" ftype="bam"/>
+            <param name="bed" value="probes.bed" ftype="bed"/>
+            <param name="method" value="pysam" />
+            <output file="coverage_pysam.tab" name="coverage_dataframe" />
+            <output file="graph_pysam.pdf" name="distribution" />
+        </test>
     </tests>
 
 
b
diff -r ebe5ec2e244d -r 35d2db3753d9 test-data/coverage_pysam.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/coverage_pysam.tab Sun Sep 24 13:34:16 2017 -0400
b
b'@@ -0,0 +1,781 @@\n+chr1\t21835833\t21836044\tuc001bet.3_exon_0_0_chr1_21835858_f;uc010odn.2_exon_0_0_chr1_21835858_f;uc010odo.2_exon_0_0_chr1_21835858_f;uc010odp.2_exon_0_0_chr1_21835858_f\t0.0\t0.0\t0.0\n+chr1\t21877748\t21877951\tuc001beu.4_exon_0_0_chr1_21877772_f\t0.0\t0.0\t0.0\n+chr1\t21880448\t21880653\tuc001bet.3_exon_1_0_chr1_21880471_f;uc001beu.4_exon_1_0_chr1_21880471_f;uc010odn.2_exon_1_0_chr1_21880471_f\t0.0\t0.0\t0.0\n+chr1\t21887093\t21887264\tuc001bet.3_exon_2_0_chr1_21887119_f;uc001beu.4_exon_2_0_chr1_21887119_f;uc010odn.2_exon_2_0_chr1_21887119_f;uc010odo.2_exon_1_0_chr1_21887119_f;uc010odp.2_exon_1_0_chr1_21887119_f\t0.0\t0.0\t0.0\n+chr1\t21887563\t21887740\tuc001bet.3_exon_3_0_chr1_21887590_f;uc001beu.4_exon_3_0_chr1_21887590_f;uc010odn.2_exon_3_0_chr1_21887631_f;uc010odo.2_exon_2_0_chr1_21887590_f\t0.0\t0.0\t0.0\n+chr1\t21889573\t21889781\tuc001bet.3_exon_4_0_chr1_21889603_f;uc001beu.4_exon_4_0_chr1_21889603_f;uc010odn.2_exon_4_0_chr1_21889603_f;uc010odo.2_exon_3_0_chr1_21889603_f;uc010odp.2_exon_2_0_chr1_21889603_f\t0.0\t0.0\t0.0\n+chr1\t21890498\t21890751\tuc001bet.3_exon_5_0_chr1_21890534_f;uc001beu.4_exon_5_0_chr1_21890534_f;uc010odn.2_exon_5_0_chr1_21890534_f;uc010odo.2_exon_4_0_chr1_21890534_f;uc010odp.2_exon_3_0_chr1_21890534_f\t0.0\t0.0\t0.0\n+chr1\t21894568\t21894783\tuc001bet.3_exon_6_0_chr1_21894597_f;uc001beu.4_exon_6_0_chr1_21894597_f;uc010odn.2_exon_6_0_chr1_21894597_f;uc010odo.2_exon_5_0_chr1_21894597_f;uc010odp.2_exon_4_0_chr1_21894597_f\t0.0\t0.0\t0.0\n+chr1\t21896758\t21896886\tuc001bet.3_exon_7_0_chr1_21896798_f;uc001beu.4_exon_7_0_chr1_21896798_f;uc010odn.2_exon_7_0_chr1_21896798_f;uc010odo.2_exon_6_0_chr1_21896798_f;uc010odp.2_exon_5_0_chr1_21896798_f\t0.0\t0.0\t0.0\n+chr1\t21900133\t21900300\tuc001bet.3_exon_8_0_chr1_21900158_f;uc001beu.4_exon_8_0_chr1_21900158_f;uc010odn.2_exon_8_0_chr1_21900158_f;uc010odo.2_exon_7_0_chr1_21900158_f;uc010odp.2_exon_6_0_chr1_21900158_f\t0.0\t0.0\t0.0\n+chr1\t21902193\t21902437\tuc001bet.3_exon_9_0_chr1_21902226_f;uc001beu.4_exon_9_0_chr1_21902226_f;uc010odn.2_exon_9_0_chr1_21902226_f;uc010odo.2_exon_8_0_chr1_21902226_f;uc010odp.2_exon_7_0_chr1_21902226_f\t0.0\t0.0\t0.0\n+chr1\t21902993\t21903162\tuc001bet.3_exon_10_0_chr1_21903015_f;uc001beu.4_exon_10_0_chr1_21903015_f;uc010odn.2_exon_10_0_chr1_21903015_f;uc010odo.2_exon_9_0_chr1_21903015_f;uc010odp.2_exon_8_0_chr1_21903015_f\t0.0\t0.0\t0.0\n+chr1\t21903853\t21904830\tuc001bet.3_exon_11_0_chr1_21903876_f;uc001beu.4_exon_11_0_chr1_21903876_f;uc010odn.2_exon_11_0_chr1_21903876_f;uc010odo.2_exon_10_0_chr1_21903876_f;uc010odp.2_exon_9_0_chr1_21903876_f\t0.0\t0.0\t0.0\n+chr1\t21904843\t21904938\tuc001bet.3_exon_11_0_chr1_21903876_f;uc001beu.4_exon_11_0_chr1_21903876_f;uc010odn.2_exon_11_0_chr1_21903876_f;uc010odo.2_exon_10_0_chr1_21903876_f;uc010odp.2_exon_9_0_chr1_21903876_f\t0.0\t0.0\t0.0\n+chr1\t43199039\t43201740\tuc001cht.1_exon_0_0_chr1_43198764_r;uc001cht.1_exon_1_0_chr1_43201549_r;uc001chu.2_exon_0_0_chr1_43198764_r;uc010ojv.1_exon_0_0_chr1_43198764_r\t0.0\t0.0\t0.0\n+chr1\t43203864\t43204005\tuc001cht.1_exon_2_0_chr1_43203900_r;uc001chu.2_exon_1_0_chr1_43203900_r\t0.0\t0.0\t0.0\n+chr1\t43204059\t43204281\tuc001cht.1_exon_3_0_chr1_43204092_r;uc001chu.2_exon_2_0_chr1_43204092_r;uc010ojv.1_exon_1_0_chr1_43204092_r\t0.0\t0.0\t0.0\n+chr1\t43205484\t43205942\tuc001cht.1_exon_4_0_chr1_43205512_r;uc001chu.2_exon_3_0_chr1_43205512_r;uc010ojv.1_exon_2_0_chr1_43205512_r\t0.0\t0.0\t0.0\n+chr1\t156211919\t156212103\tuc001fnt.3_exon_0_0_chr1_156211951_f\t0.0\t0.0\t0.0\n+chr1\t156212284\t156212426\tuc001fnt.3_exon_1_0_chr1_156212344_f\t0.0\t0.0\t0.0\n+chr1\t156212514\t156212654\tuc001fnt.3_exon_2_0_chr1_156212553_f\t0.0\t0.0\t0.0\n+chr1\t156212799\t156213150\tuc001fnt.3_exon_3_0_chr1_156212824_f\t0.0\t0.0\t0.0\n+chr1\t165370129\t165370662\tuc001gda.3_exon_0_0_chr1_165370159_r;uc021pea.1_exon_0_0_chr1_165370159_r;uc031prc.1_exon_0_0_chr1_165370159_r\t0.0\t0.0\t0.0\n+chr1\t165376014\t165376193\tuc001gda.3_exon_1_0_chr1_165376049_r;uc021pea.1_exon_1_0_chr1_165376049_r;uc031prc.1_exon_1_0_chr1_165376049_r\t0.0\t0.0\t0.0\n+chr1\t165377434\t165377569\tuc001gda.3_exon_2_0_chr1_165377464'..b'60597\t49860902\tuc004doq.1_exon_14_0_chrX_49856786_f;uc004dor.1_exon_14_0_chrX_49856786_f;uc004dos.1_exon_11_0_chrX_49856786_f;uc004dot.1_exon_10_0_chrX_49856786_f;uc031tjo.1_exon_11_0_chrX_49856786_f\t0.0\t0.0\t0.0\n+chrX\t49860907\t49860973\tuc004doq.1_exon_14_0_chrX_49856786_f;uc004dor.1_exon_14_0_chrX_49856786_f;uc004dos.1_exon_11_0_chrX_49856786_f;uc004dot.1_exon_10_0_chrX_49856786_f;uc031tjo.1_exon_11_0_chrX_49856786_f\t0.0\t0.0\t0.0\n+chrX\t49861267\t49861657\tuc004doq.1_exon_14_0_chrX_49856786_f;uc004dor.1_exon_14_0_chrX_49856786_f;uc004dos.1_exon_11_0_chrX_49856786_f;uc004dot.1_exon_10_0_chrX_49856786_f;uc031tjo.1_exon_11_0_chrX_49856786_f\t0.0\t0.0\t0.0\n+chrX\t49861667\t49862218\tuc004doq.1_exon_14_0_chrX_49856786_f;uc004dor.1_exon_14_0_chrX_49856786_f;uc004dos.1_exon_11_0_chrX_49856786_f;uc004dot.1_exon_10_0_chrX_49856786_f;uc031tjo.1_exon_11_0_chrX_49856786_f\t0.0\t0.0\t0.0\n+chrX\t49862222\t49863932\tuc004doq.1_exon_14_0_chrX_49856786_f;uc004dor.1_exon_14_0_chrX_49856786_f;uc004dos.1_exon_11_0_chrX_49856786_f;uc004dot.1_exon_10_0_chrX_49856786_f;uc031tjo.1_exon_11_0_chrX_49856786_f\t0.0\t0.0\t0.0\n+chrX\t128674224\t128674478\tuc004euq.3_exon_0_0_chrX_128674252_f;uc004eur.3_exon_0_0_chrX_128674252_f\t0.0\t0.0\t0.0\n+chrX\t128674679\t128674822\tuc004euq.3_exon_1_0_chrX_128674721_f;uc004eur.3_exon_1_0_chrX_128674721_f\t0.0\t0.0\t0.0\n+chrX\t128678889\t128679038\tuc004euq.3_exon_2_0_chrX_128678935_f;uc004eur.3_exon_2_0_chrX_128678935_f\t0.0\t0.0\t0.0\n+chrX\t128682479\t128682621\tuc004euq.3_exon_3_0_chrX_128682540_f;uc004eur.3_exon_3_0_chrX_128682540_f\t0.0\t0.0\t0.0\n+chrX\t128691279\t128691450\tuc004euq.3_exon_4_0_chrX_128691302_f;uc004eur.3_exon_4_0_chrX_128691302_f\t0.0\t0.0\t0.0\n+chrX\t128691809\t128691945\tuc004euq.3_exon_5_0_chrX_128691838_f;uc004eur.3_exon_5_0_chrX_128691838_f\t0.0\t0.0\t0.0\n+chrX\t128692584\t128692748\tuc004euq.3_exon_6_0_chrX_128692610_f;uc004eur.3_exon_6_0_chrX_128692610_f\t0.0\t0.0\t0.0\n+chrX\t128692794\t128693002\tuc004euq.3_exon_7_0_chrX_128692817_f;uc004eur.3_exon_7_0_chrX_128692817_f\t0.0\t0.0\t0.0\n+chrX\t128694504\t128694640\tuc004euq.3_exon_8_0_chrX_128694527_f;uc004eur.3_exon_8_0_chrX_128694527_f\t0.0\t0.0\t0.0\n+chrX\t128695129\t128695300\tuc004euq.3_exon_9_0_chrX_128695156_f;uc004eur.3_exon_9_0_chrX_128695156_f\t0.0\t0.0\t0.0\n+chrX\t128696329\t128696513\tuc004euq.3_exon_10_0_chrX_128696361_f;uc004eur.3_exon_10_0_chrX_128696361_f\t0.0\t0.0\t0.0\n+chrX\t128696554\t128696799\tuc004euq.3_exon_11_0_chrX_128696576_f;uc004eur.3_exon_11_0_chrX_128696576_f\t0.0\t0.0\t0.0\n+chrX\t128699724\t128699891\tuc004euq.3_exon_12_0_chrX_128699749_f;uc004eur.3_exon_12_0_chrX_128699749_f\t0.0\t0.0\t0.0\n+chrX\t128701204\t128701373\tuc004euq.3_exon_13_0_chrX_128701231_f;uc004eur.3_exon_13_0_chrX_128701231_f\t0.0\t0.0\t0.0\n+chrX\t128703209\t128703395\tuc004euq.3_exon_14_0_chrX_128703241_f;uc004eur.3_exon_14_0_chrX_128703241_f\t0.0\t0.0\t0.0\n+chrX\t128709089\t128709193\tuc004euq.3_exon_15_0_chrX_128709117_f;uc004eur.3_exon_15_0_chrX_128709117_f\t0.0\t0.0\t0.0\n+chrX\t128709839\t128710056\tuc004euq.3_exon_16_0_chrX_128709874_f;uc004eur.3_exon_16_0_chrX_128709874_f\t0.0\t0.0\t0.0\n+chrX\t128710269\t128710542\tuc004euq.3_exon_17_0_chrX_128710294_f;uc004eur.3_exon_17_0_chrX_128710294_f\t0.0\t0.0\t0.0\n+chrX\t128718259\t128718394\tuc004euq.3_exon_18_0_chrX_128718321_f\t0.0\t0.0\t0.0\n+chrX\t128720954\t128721127\tuc004euq.3_exon_19_0_chrX_128720979_f;uc004eur.3_exon_18_0_chrX_128720979_f\t0.0\t0.0\t0.0\n+chrX\t128722114\t128722271\tuc004euq.3_exon_20_0_chrX_128722156_f;uc004eur.3_exon_19_0_chrX_128722156_f\t0.0\t0.0\t0.0\n+chrX\t128722829\t128723018\tuc004euq.3_exon_21_0_chrX_128722863_f;uc004eur.3_exon_20_0_chrX_128722863_f;uc010nrb.3_exon_0_0_chrX_128722863_f\t0.0\t0.0\t0.0\n+chrX\t128723789\t128723974\tuc004euq.3_exon_22_0_chrX_128723822_f;uc004eur.3_exon_21_0_chrX_128723822_f\t0.0\t0.0\t0.0\n+chrX\t128724099\t128725469\tuc004euq.3_exon_23_0_chrX_128724123_f;uc004eur.3_exon_22_0_chrX_128724123_f;uc010nrb.3_exon_1_0_chrX_128724123_f\t0.0\t0.0\t0.0\n+chrX\t128725474\t128726564\tuc004euq.3_exon_23_0_chrX_128724123_f;uc004eur.3_exon_22_0_chrX_128724123_f;uc010nrb.3_exon_1_0_chrX_128724123_f\t0.0\t0.0\t0.0\n'
b
diff -r ebe5ec2e244d -r 35d2db3753d9 test-data/graph.pdf
b
Binary file test-data/graph.pdf has changed
b
diff -r ebe5ec2e244d -r 35d2db3753d9 test-data/graph_pysam.pdf
b
Binary file test-data/graph_pysam.pdf has changed