Mercurial > repos > iuc > variant_analyzer
annotate mut2read.py @ 2:3f1dbd2c59bf draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
author | iuc |
---|---|
date | Tue, 10 Nov 2020 12:55:29 +0000 |
parents | 3556001ff2db |
children |
rev | line source |
---|---|
0
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
2 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
3 """mut2read.py |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
4 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
5 Author -- Gundula Povysil |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
6 Contact -- povysil@bioinf.jku.at |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
7 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
8 Takes a tabular file with mutations and a BAM file as input and prints |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
9 all tags of reads that carry the mutation to a user specified output file. |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
10 Creates fastq file of reads of tags with mutation. |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
11 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
12 ======= ========== ================= ================================ |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
13 Version Date Author Description |
2
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
14 2.0.0 2020-10-30 Gundula Povysil - |
0
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
15 ======= ========== ================= ================================ |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
16 |
2
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
17 USAGE: python mut2read.py DCS_Mutations.tabular DCS.bam Aligned_Families.tabular Interesting_Reads.fastq tag_count_dict.json |
0
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
18 """ |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
19 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
20 import argparse |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
21 import json |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
22 import os |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
23 import sys |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
24 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
25 import numpy as np |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
26 import pysam |
2
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
27 from cyvcf2 import VCF |
0
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
28 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
29 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
30 def make_argparser(): |
2
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
31 parser = argparse.ArgumentParser(description='Takes a vcf file with mutations and a BAM file as input and prints all tags of reads that carry the mutation to a user specified output file and creates a fastq file of reads of tags with mutation.') |
0
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
32 parser.add_argument('--mutFile', |
2
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
33 help='VCF file with DCS mutations.') |
0
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
34 parser.add_argument('--bamFile', |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
35 help='BAM file with aligned DCS reads.') |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
36 parser.add_argument('--familiesFile', |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
37 help='TABULAR file with aligned families.') |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
38 parser.add_argument('--outputFastq', |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
39 help='Output FASTQ file of reads with mutations.') |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
40 parser.add_argument('--outputJson', |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
41 help='Output JSON file to store collected data.') |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
42 return parser |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
43 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
44 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
45 def mut2read(argv): |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
46 parser = make_argparser() |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
47 args = parser.parse_args(argv[1:]) |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
48 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
49 file1 = args.mutFile |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
50 file2 = args.bamFile |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
51 file3 = args.familiesFile |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
52 outfile = args.outputFastq |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
53 json_file = args.outputJson |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
54 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
55 if os.path.isfile(file1) is False: |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
56 sys.exit("Error: Could not find '{}'".format(file1)) |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
57 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
58 if os.path.isfile(file2) is False: |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
59 sys.exit("Error: Could not find '{}'".format(file2)) |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
60 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
61 if os.path.isfile(file3) is False: |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
62 sys.exit("Error: Could not find '{}'".format(file3)) |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
63 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
64 # read dcs bam file |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
65 bam = pysam.AlignmentFile(file2, "rb") |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
66 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
67 # get tags |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
68 tag_dict = {} |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
69 cvrg_dict = {} |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
70 |
2
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
71 for variant in VCF(file1): |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
72 chrom = variant.CHROM |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
73 stop_pos = variant.start |
0
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
74 chrom_stop_pos = str(chrom) + "#" + str(stop_pos) |
2
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
75 ref = variant.REF |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
76 alt = variant.ALT[0] |
0
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
77 dcs_len = [] |
2
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
78 if len(ref) == len(alt): |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
79 for pileupcolumn in bam.pileup(chrom, stop_pos - 1, stop_pos + 1, max_depth=100000000): |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
80 if pileupcolumn.reference_pos == stop_pos: |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
81 count_alt = 0 |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
82 count_ref = 0 |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
83 count_indel = 0 |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
84 count_n = 0 |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
85 count_other = 0 |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
86 count_lowq = 0 |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
87 print("unfiltered reads=", pileupcolumn.n, "filtered reads=", len(pileupcolumn.pileups), |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
88 "difference= ", len(pileupcolumn.pileups) - pileupcolumn.n) |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
89 for pileupread in pileupcolumn.pileups: |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
90 if not pileupread.is_del and not pileupread.is_refskip: |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
91 # query position is None if is_del or is_refskip is set. |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
92 nuc = pileupread.alignment.query_sequence[pileupread.query_position] |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
93 dcs_len.append(len(pileupread.alignment.query_sequence)) |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
94 if nuc == alt: |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
95 count_alt += 1 |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
96 tag = pileupread.alignment.query_name |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
97 if tag in tag_dict: |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
98 tag_dict[tag][chrom_stop_pos] = alt |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
99 else: |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
100 tag_dict[tag] = {} |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
101 tag_dict[tag][chrom_stop_pos] = alt |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
102 elif nuc == ref: |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
103 count_ref += 1 |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
104 elif nuc == "N": |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
105 count_n += 1 |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
106 elif nuc == "lowQ": |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
107 count_lowq += 1 |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
108 else: |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
109 count_other += 1 |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
110 else: |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
111 count_indel += 1 |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
112 dcs_median = np.median(np.array(dcs_len)) |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
113 cvrg_dict[chrom_stop_pos] = (count_ref, count_alt, dcs_median) |
0
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
114 |
2
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
115 print("coverage at pos %s = %s, ref = %s, alt = %s, other bases = %s, N = %s, indel = %s, low quality = %s, median length of DCS = %s\n" % |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
116 (pileupcolumn.pos, count_ref + count_alt, count_ref, count_alt, count_other, count_n, |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
117 count_indel, count_lowq, dcs_median)) |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
118 else: |
3f1dbd2c59bf
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit f492e9717cb946f0eb5689cd7b6eb8067abf6468"
iuc
parents:
1
diff
changeset
|
119 print("indels are currently not evaluated") |
0
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
120 bam.close() |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
121 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
122 with open(json_file, "w") as f: |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
123 json.dump((tag_dict, cvrg_dict), f) |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
124 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
125 # create fastq from aligned reads |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
126 with open(outfile, 'w') as out: |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
127 with open(file3, 'r') as families: |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
128 for line in families: |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
129 line = line.rstrip('\n') |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
130 splits = line.split('\t') |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
131 tag = splits[0] |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
132 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
133 if tag in tag_dict: |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
134 str1 = splits[4] |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
135 curr_seq = str1.replace("-", "") |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
136 str2 = splits[5] |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
137 curr_qual = str2.replace(" ", "") |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
138 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
139 out.write("@" + splits[0] + "." + splits[1] + "." + splits[2] + "\n") |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
140 out.write(curr_seq + "\n") |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
141 out.write("+" + "\n") |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
142 out.write(curr_qual + "\n") |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
143 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
144 |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
145 if __name__ == '__main__': |
8d29173d49a9
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 5a438f76d0ecb6478f82dae6b9596bc7f5a4f4e8"
iuc
parents:
diff
changeset
|
146 sys.exit(mut2read(sys.argv)) |