Previous changeset 1:ebe5ec2e244d (2017-09-23) Next changeset 3:4f744d3aaf0b (2017-09-24) |
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/probecoverage commit 301fc26e062ac02a28676a05aa9c82e4407e3d29 |
modified:
probecoverage.xml test-data/graph.pdf |
added:
multicov.py test-data/coverage_pysam.tab test-data/graph_pysam.pdf |
b |
diff -r ebe5ec2e244d -r 35d2db3753d9 multicov.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/multicov.py Sun Sep 24 13:34:16 2017 -0400 |
[ |
@@ -0,0 +1,60 @@ +import argparse + +import numpy + +import pysam + + +def Parser(): + the_parser = argparse.ArgumentParser() + the_parser.add_argument('-bams', '--bams', dest='bams', required=True, + nargs='+', help='list of input BAM files') + the_parser.add_argument('-bed', '--bed', dest='bed', required=True, + help='Coordinates of probes in a bed file') + args = the_parser.parse_args() + return args + + +def compute_coverage(bam, bed, quality=10): + bam_object = pysam.AlignmentFile(bam, 'rb') + bed_object = open(bed, 'r') + coverage_column = [] + for line in bed_object: + if line[0] == '#': + continue + fields = line[:-1].split('\t') + chr = fields[0] + start = fields[1] + end = fields[2] + coverage = bam_object.count_coverage(reference=chr, + start=int(start)-1, + end=int(end), + quality_threshold=quality) + """ Add the 4 coverage values """ + coverage = [sum(x) for x in zip(*coverage)] + coverage_column.append(numpy.mean(coverage)) + bed_object.close() + return coverage_column + + +def main(bams, bed): + column_dict = {} + for i, bam in enumerate(bams): + column_dict[i] = compute_coverage(bam, bed) + F = open(bed, 'r') + line_counter = 0 + for line in F: + if line[0] == '#': + continue + prefix = line[:-1] + crossline = [] + for col in sorted(column_dict): + crossline.append(str(column_dict[col][line_counter])) + line_counter += 1 + suffix = '\t'.join(crossline) + print('%s\t%s' % (prefix, suffix)) + + +if __name__ == "__main__": + args = Parser() + main(args.bams, args.bed) |
b |
diff -r ebe5ec2e244d -r 35d2db3753d9 probecoverage.xml --- a/probecoverage.xml Sat Sep 23 12:56:17 2017 -0400 +++ b/probecoverage.xml Sun Sep 24 13:34:16 2017 -0400 |
b |
@@ -1,11 +1,12 @@ -<tool id="probecoverage" name="Probe Coverage" version="0.2.0"> +<tool id="probecoverage" name="Probe Coverage" version="0.3.0"> <description></description> <requirements> <requirement type="package" version="1.4.1">samtools</requirement> <requirement type="package" version="2.26.0">bedtools</requirement> <requirement type="package" version="1.3.2=r3.3.1_0">r-optparse</requirement> <requirement type="package" version="2.2.1=r3.3.1_0">r-ggplot2</requirement> - <requirement type="package" version="1.4.2=r3.3.1_0">r-reshape2</requirement> + <requirement type="package" version="1.11.2=py27_0">numpy</requirement> + <requirement type="package" version="0.11.2.1=py27_0">pysam</requirement> </requirements> <stdio> <exit_code range="1:" level="fatal" description="Tool exception" /> @@ -14,16 +15,25 @@ #for $file in $inputs samtools index '$file' && #end for - bedtools multicov + #if $method == 'pysam': + python $__tool_directory__/multicov.py + #else: + bedtools multicov + #end if -bams #for $file in $inputs '$file' #end for - -bed '$bed' > $bedtools_table && + -bed '$bed' > $coverage_dataframe && Rscript '$__tool_directory__'/probecoverage.r - --input '$bedtools_table' + --input '$coverage_dataframe' --title 'Probe coverage depth (cumulative distribution)' - --xlab 'Depth' + --xlab + #if $method == 'pysam': + 'Depth (pysam method)' + #else: + 'Depth (bedtools muticov method)' + #end if --ylab 'Fraction of covered regions with coverage >= Depth' --output '$distribution' --sample " @@ -33,13 +43,16 @@ " ]]></command> - <inputs> - <param name="inputs" type="data" format="bam" label="Select multiple Bam alignments to parse" multiple="True"/> - <param name="bed" type="data" format="bed" label="Select a bed file describing the genomic regions to analyze" /> - </inputs> +<inputs> + <param name="inputs" type="data" format="bam" label="Select multiple Bam alignments to parse" multiple="True"/> + <param name="bed" type="data" format="bed" label="Select a bed file describing the genomic regions to analyze" /> + <param name="method" type="boolean" falsevalue="bedtools" checked="false" truevalue="pysam" + help="coverage is computed using bedtools multicov (default) or pysam module" + label="Compute coverage with pysam (bedtools by default)" /> +</inputs> <outputs> - <data format="tabular" name="bedtools_table" label="bedtools multicov output" /> + <data format="tabular" name="coverage_dataframe" label="coverage table" /> <data format="pdf" name="distribution" label="Cumulative distribution of region coverages" /> </outputs> @@ -47,9 +60,17 @@ <test> <param name="inputs" value="sample1,sample2,sample3" ftype="bam"/> <param name="bed" value="probes.bed" ftype="bed"/> - <output file="coverage.tab" name="bedtools_table" /> + <param name="method" value="bedtools" /> + <output file="coverage.tab" name="coverage_dataframe" /> <output file="graph.pdf" name="distribution" /> </test> + <test> + <param name="inputs" value="sample1,sample2,sample3" ftype="bam"/> + <param name="bed" value="probes.bed" ftype="bed"/> + <param name="method" value="pysam" /> + <output file="coverage_pysam.tab" name="coverage_dataframe" /> + <output file="graph_pysam.pdf" name="distribution" /> + </test> </tests> |
b |
diff -r ebe5ec2e244d -r 35d2db3753d9 test-data/coverage_pysam.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/coverage_pysam.tab Sun Sep 24 13:34:16 2017 -0400 |
b |
b'@@ -0,0 +1,781 @@\n+chr1\t21835833\t21836044\tuc001bet.3_exon_0_0_chr1_21835858_f;uc010odn.2_exon_0_0_chr1_21835858_f;uc010odo.2_exon_0_0_chr1_21835858_f;uc010odp.2_exon_0_0_chr1_21835858_f\t0.0\t0.0\t0.0\n+chr1\t21877748\t21877951\tuc001beu.4_exon_0_0_chr1_21877772_f\t0.0\t0.0\t0.0\n+chr1\t21880448\t21880653\tuc001bet.3_exon_1_0_chr1_21880471_f;uc001beu.4_exon_1_0_chr1_21880471_f;uc010odn.2_exon_1_0_chr1_21880471_f\t0.0\t0.0\t0.0\n+chr1\t21887093\t21887264\tuc001bet.3_exon_2_0_chr1_21887119_f;uc001beu.4_exon_2_0_chr1_21887119_f;uc010odn.2_exon_2_0_chr1_21887119_f;uc010odo.2_exon_1_0_chr1_21887119_f;uc010odp.2_exon_1_0_chr1_21887119_f\t0.0\t0.0\t0.0\n+chr1\t21887563\t21887740\tuc001bet.3_exon_3_0_chr1_21887590_f;uc001beu.4_exon_3_0_chr1_21887590_f;uc010odn.2_exon_3_0_chr1_21887631_f;uc010odo.2_exon_2_0_chr1_21887590_f\t0.0\t0.0\t0.0\n+chr1\t21889573\t21889781\tuc001bet.3_exon_4_0_chr1_21889603_f;uc001beu.4_exon_4_0_chr1_21889603_f;uc010odn.2_exon_4_0_chr1_21889603_f;uc010odo.2_exon_3_0_chr1_21889603_f;uc010odp.2_exon_2_0_chr1_21889603_f\t0.0\t0.0\t0.0\n+chr1\t21890498\t21890751\tuc001bet.3_exon_5_0_chr1_21890534_f;uc001beu.4_exon_5_0_chr1_21890534_f;uc010odn.2_exon_5_0_chr1_21890534_f;uc010odo.2_exon_4_0_chr1_21890534_f;uc010odp.2_exon_3_0_chr1_21890534_f\t0.0\t0.0\t0.0\n+chr1\t21894568\t21894783\tuc001bet.3_exon_6_0_chr1_21894597_f;uc001beu.4_exon_6_0_chr1_21894597_f;uc010odn.2_exon_6_0_chr1_21894597_f;uc010odo.2_exon_5_0_chr1_21894597_f;uc010odp.2_exon_4_0_chr1_21894597_f\t0.0\t0.0\t0.0\n+chr1\t21896758\t21896886\tuc001bet.3_exon_7_0_chr1_21896798_f;uc001beu.4_exon_7_0_chr1_21896798_f;uc010odn.2_exon_7_0_chr1_21896798_f;uc010odo.2_exon_6_0_chr1_21896798_f;uc010odp.2_exon_5_0_chr1_21896798_f\t0.0\t0.0\t0.0\n+chr1\t21900133\t21900300\tuc001bet.3_exon_8_0_chr1_21900158_f;uc001beu.4_exon_8_0_chr1_21900158_f;uc010odn.2_exon_8_0_chr1_21900158_f;uc010odo.2_exon_7_0_chr1_21900158_f;uc010odp.2_exon_6_0_chr1_21900158_f\t0.0\t0.0\t0.0\n+chr1\t21902193\t21902437\tuc001bet.3_exon_9_0_chr1_21902226_f;uc001beu.4_exon_9_0_chr1_21902226_f;uc010odn.2_exon_9_0_chr1_21902226_f;uc010odo.2_exon_8_0_chr1_21902226_f;uc010odp.2_exon_7_0_chr1_21902226_f\t0.0\t0.0\t0.0\n+chr1\t21902993\t21903162\tuc001bet.3_exon_10_0_chr1_21903015_f;uc001beu.4_exon_10_0_chr1_21903015_f;uc010odn.2_exon_10_0_chr1_21903015_f;uc010odo.2_exon_9_0_chr1_21903015_f;uc010odp.2_exon_8_0_chr1_21903015_f\t0.0\t0.0\t0.0\n+chr1\t21903853\t21904830\tuc001bet.3_exon_11_0_chr1_21903876_f;uc001beu.4_exon_11_0_chr1_21903876_f;uc010odn.2_exon_11_0_chr1_21903876_f;uc010odo.2_exon_10_0_chr1_21903876_f;uc010odp.2_exon_9_0_chr1_21903876_f\t0.0\t0.0\t0.0\n+chr1\t21904843\t21904938\tuc001bet.3_exon_11_0_chr1_21903876_f;uc001beu.4_exon_11_0_chr1_21903876_f;uc010odn.2_exon_11_0_chr1_21903876_f;uc010odo.2_exon_10_0_chr1_21903876_f;uc010odp.2_exon_9_0_chr1_21903876_f\t0.0\t0.0\t0.0\n+chr1\t43199039\t43201740\tuc001cht.1_exon_0_0_chr1_43198764_r;uc001cht.1_exon_1_0_chr1_43201549_r;uc001chu.2_exon_0_0_chr1_43198764_r;uc010ojv.1_exon_0_0_chr1_43198764_r\t0.0\t0.0\t0.0\n+chr1\t43203864\t43204005\tuc001cht.1_exon_2_0_chr1_43203900_r;uc001chu.2_exon_1_0_chr1_43203900_r\t0.0\t0.0\t0.0\n+chr1\t43204059\t43204281\tuc001cht.1_exon_3_0_chr1_43204092_r;uc001chu.2_exon_2_0_chr1_43204092_r;uc010ojv.1_exon_1_0_chr1_43204092_r\t0.0\t0.0\t0.0\n+chr1\t43205484\t43205942\tuc001cht.1_exon_4_0_chr1_43205512_r;uc001chu.2_exon_3_0_chr1_43205512_r;uc010ojv.1_exon_2_0_chr1_43205512_r\t0.0\t0.0\t0.0\n+chr1\t156211919\t156212103\tuc001fnt.3_exon_0_0_chr1_156211951_f\t0.0\t0.0\t0.0\n+chr1\t156212284\t156212426\tuc001fnt.3_exon_1_0_chr1_156212344_f\t0.0\t0.0\t0.0\n+chr1\t156212514\t156212654\tuc001fnt.3_exon_2_0_chr1_156212553_f\t0.0\t0.0\t0.0\n+chr1\t156212799\t156213150\tuc001fnt.3_exon_3_0_chr1_156212824_f\t0.0\t0.0\t0.0\n+chr1\t165370129\t165370662\tuc001gda.3_exon_0_0_chr1_165370159_r;uc021pea.1_exon_0_0_chr1_165370159_r;uc031prc.1_exon_0_0_chr1_165370159_r\t0.0\t0.0\t0.0\n+chr1\t165376014\t165376193\tuc001gda.3_exon_1_0_chr1_165376049_r;uc021pea.1_exon_1_0_chr1_165376049_r;uc031prc.1_exon_1_0_chr1_165376049_r\t0.0\t0.0\t0.0\n+chr1\t165377434\t165377569\tuc001gda.3_exon_2_0_chr1_165377464'..b'60597\t49860902\tuc004doq.1_exon_14_0_chrX_49856786_f;uc004dor.1_exon_14_0_chrX_49856786_f;uc004dos.1_exon_11_0_chrX_49856786_f;uc004dot.1_exon_10_0_chrX_49856786_f;uc031tjo.1_exon_11_0_chrX_49856786_f\t0.0\t0.0\t0.0\n+chrX\t49860907\t49860973\tuc004doq.1_exon_14_0_chrX_49856786_f;uc004dor.1_exon_14_0_chrX_49856786_f;uc004dos.1_exon_11_0_chrX_49856786_f;uc004dot.1_exon_10_0_chrX_49856786_f;uc031tjo.1_exon_11_0_chrX_49856786_f\t0.0\t0.0\t0.0\n+chrX\t49861267\t49861657\tuc004doq.1_exon_14_0_chrX_49856786_f;uc004dor.1_exon_14_0_chrX_49856786_f;uc004dos.1_exon_11_0_chrX_49856786_f;uc004dot.1_exon_10_0_chrX_49856786_f;uc031tjo.1_exon_11_0_chrX_49856786_f\t0.0\t0.0\t0.0\n+chrX\t49861667\t49862218\tuc004doq.1_exon_14_0_chrX_49856786_f;uc004dor.1_exon_14_0_chrX_49856786_f;uc004dos.1_exon_11_0_chrX_49856786_f;uc004dot.1_exon_10_0_chrX_49856786_f;uc031tjo.1_exon_11_0_chrX_49856786_f\t0.0\t0.0\t0.0\n+chrX\t49862222\t49863932\tuc004doq.1_exon_14_0_chrX_49856786_f;uc004dor.1_exon_14_0_chrX_49856786_f;uc004dos.1_exon_11_0_chrX_49856786_f;uc004dot.1_exon_10_0_chrX_49856786_f;uc031tjo.1_exon_11_0_chrX_49856786_f\t0.0\t0.0\t0.0\n+chrX\t128674224\t128674478\tuc004euq.3_exon_0_0_chrX_128674252_f;uc004eur.3_exon_0_0_chrX_128674252_f\t0.0\t0.0\t0.0\n+chrX\t128674679\t128674822\tuc004euq.3_exon_1_0_chrX_128674721_f;uc004eur.3_exon_1_0_chrX_128674721_f\t0.0\t0.0\t0.0\n+chrX\t128678889\t128679038\tuc004euq.3_exon_2_0_chrX_128678935_f;uc004eur.3_exon_2_0_chrX_128678935_f\t0.0\t0.0\t0.0\n+chrX\t128682479\t128682621\tuc004euq.3_exon_3_0_chrX_128682540_f;uc004eur.3_exon_3_0_chrX_128682540_f\t0.0\t0.0\t0.0\n+chrX\t128691279\t128691450\tuc004euq.3_exon_4_0_chrX_128691302_f;uc004eur.3_exon_4_0_chrX_128691302_f\t0.0\t0.0\t0.0\n+chrX\t128691809\t128691945\tuc004euq.3_exon_5_0_chrX_128691838_f;uc004eur.3_exon_5_0_chrX_128691838_f\t0.0\t0.0\t0.0\n+chrX\t128692584\t128692748\tuc004euq.3_exon_6_0_chrX_128692610_f;uc004eur.3_exon_6_0_chrX_128692610_f\t0.0\t0.0\t0.0\n+chrX\t128692794\t128693002\tuc004euq.3_exon_7_0_chrX_128692817_f;uc004eur.3_exon_7_0_chrX_128692817_f\t0.0\t0.0\t0.0\n+chrX\t128694504\t128694640\tuc004euq.3_exon_8_0_chrX_128694527_f;uc004eur.3_exon_8_0_chrX_128694527_f\t0.0\t0.0\t0.0\n+chrX\t128695129\t128695300\tuc004euq.3_exon_9_0_chrX_128695156_f;uc004eur.3_exon_9_0_chrX_128695156_f\t0.0\t0.0\t0.0\n+chrX\t128696329\t128696513\tuc004euq.3_exon_10_0_chrX_128696361_f;uc004eur.3_exon_10_0_chrX_128696361_f\t0.0\t0.0\t0.0\n+chrX\t128696554\t128696799\tuc004euq.3_exon_11_0_chrX_128696576_f;uc004eur.3_exon_11_0_chrX_128696576_f\t0.0\t0.0\t0.0\n+chrX\t128699724\t128699891\tuc004euq.3_exon_12_0_chrX_128699749_f;uc004eur.3_exon_12_0_chrX_128699749_f\t0.0\t0.0\t0.0\n+chrX\t128701204\t128701373\tuc004euq.3_exon_13_0_chrX_128701231_f;uc004eur.3_exon_13_0_chrX_128701231_f\t0.0\t0.0\t0.0\n+chrX\t128703209\t128703395\tuc004euq.3_exon_14_0_chrX_128703241_f;uc004eur.3_exon_14_0_chrX_128703241_f\t0.0\t0.0\t0.0\n+chrX\t128709089\t128709193\tuc004euq.3_exon_15_0_chrX_128709117_f;uc004eur.3_exon_15_0_chrX_128709117_f\t0.0\t0.0\t0.0\n+chrX\t128709839\t128710056\tuc004euq.3_exon_16_0_chrX_128709874_f;uc004eur.3_exon_16_0_chrX_128709874_f\t0.0\t0.0\t0.0\n+chrX\t128710269\t128710542\tuc004euq.3_exon_17_0_chrX_128710294_f;uc004eur.3_exon_17_0_chrX_128710294_f\t0.0\t0.0\t0.0\n+chrX\t128718259\t128718394\tuc004euq.3_exon_18_0_chrX_128718321_f\t0.0\t0.0\t0.0\n+chrX\t128720954\t128721127\tuc004euq.3_exon_19_0_chrX_128720979_f;uc004eur.3_exon_18_0_chrX_128720979_f\t0.0\t0.0\t0.0\n+chrX\t128722114\t128722271\tuc004euq.3_exon_20_0_chrX_128722156_f;uc004eur.3_exon_19_0_chrX_128722156_f\t0.0\t0.0\t0.0\n+chrX\t128722829\t128723018\tuc004euq.3_exon_21_0_chrX_128722863_f;uc004eur.3_exon_20_0_chrX_128722863_f;uc010nrb.3_exon_0_0_chrX_128722863_f\t0.0\t0.0\t0.0\n+chrX\t128723789\t128723974\tuc004euq.3_exon_22_0_chrX_128723822_f;uc004eur.3_exon_21_0_chrX_128723822_f\t0.0\t0.0\t0.0\n+chrX\t128724099\t128725469\tuc004euq.3_exon_23_0_chrX_128724123_f;uc004eur.3_exon_22_0_chrX_128724123_f;uc010nrb.3_exon_1_0_chrX_128724123_f\t0.0\t0.0\t0.0\n+chrX\t128725474\t128726564\tuc004euq.3_exon_23_0_chrX_128724123_f;uc004eur.3_exon_22_0_chrX_128724123_f;uc010nrb.3_exon_1_0_chrX_128724123_f\t0.0\t0.0\t0.0\n' |
b |
diff -r ebe5ec2e244d -r 35d2db3753d9 test-data/graph.pdf |
b |
Binary file test-data/graph.pdf has changed |
b |
diff -r ebe5ec2e244d -r 35d2db3753d9 test-data/graph_pysam.pdf |
b |
Binary file test-data/graph_pysam.pdf has changed |