comparison bcftools.py @ 0:3830d29fca6a draft

Uploaded
author jaredgk
date Mon, 15 Oct 2018 18:15:47 -0400
parents
children 86a9d8d5b291
comparison
equal deleted inserted replaced
-1:000000000000 0:3830d29fca6a
1 import os
2 import sys
3 import logging
4 import subprocess
5
6 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir,'jared')))
7
8 from vcf_reader_func import checkFormat
9
10 def check_bcftools_for_errors (bcftools_stderr):
11 '''
12 Checks the bgzip stderr for errors
13
14 Parameters
15 ----------
16 bcftools_stderr : str
17 bcftools stderr
18
19 Raises
20 ------
21 IOError
22 If bcftools stderr returns an error
23 '''
24
25 # Expand as errors are discovered
26 if bcftools_stderr:
27 logging.error(vcftools_stderr)
28 raise Exception(vcftools_stderr)
29
30 def call_bcftools (bcftools_call_args):
31
32 # bcftools subprocess call
33 bcftools_call = subprocess.Popen(['bcftools'] + list(map(str, bcftools_call_args)), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
34
35 # Wait for bcftools to finish
36 bcftools_out, bcftools_err = bcftools_call.communicate()
37
38 check_bcftools_for_errors(bcftools_err)
39
40 logging.info('bcftools call complete')
41
42 def check_for_index (filename):
43
44 # Assign the file format
45 file_format = checkFormat(filename)
46
47 # Check if the file to be indexed is a vcf.gz
48 if file_format == 'bgzip':
49 # Check if the index (.tbi) exists
50 if os.path.isfile(filename + '.tbi'):
51 return True
52
53 # Check if the file to be indexed is a bcf
54 elif file_format == 'bcf':
55 # Check if the index (.csi) exists
56 if os.path.isfile(filename + '.csi'):
57 return True
58
59 # Return false if no index is found
60 return False
61
62 def create_index (filename):
63
64 # Assign the file format
65 file_format = checkFormat(filename)
66
67 # Check if the file to be indexed is a vcf.gz
68 if file_format == 'bgzip':
69 # Create a index (.tbi)
70 call_bcftools(['index', '-t', filename])
71
72 # Check if the file to be indexed is a bcf
73 elif file_format == 'bcf':
74 # Create a index (.csi)
75 call_bcftools(['index', '-c', filename])
76
77 # Report if file cannot be indexed
78 else:
79 raise Exception('Error creating index for: %s. Only .bcf and .vcf.gz (bgzip) files are supported.' % filename)
80
81 def convert_to_bcf (filename, output_prefix):
82
83 # Holds the arguments to convert to BCF format
84 convert_args = ['convert', '-O', 'b']
85
86 # Stores the specified output_prefix to the BCF file
87 bcf_output = '%s.bcf' % output_prefix
88
89 # Assigns the output file to the arguments
90 convert_args.extend(['-o', bcf_output])
91
92 # Assigns the specified input to the arguments
93 convert_args.append(filename)
94
95 # Call bcftools
96 call_bcftools(convert_args)
97
98
99 def convert_to_vcf (filename, output_prefix):
100
101 # Holds the arguments to convert to VCF format
102 convert_args = ['view', '-O', 'v']
103
104 # Stores the specified output_prefix to the VCF file
105 vcf_output = '%s.vcf' % output_prefix
106
107 # Assigns the output file to the arguments
108 convert_args.extend(['-o', vcf_output])
109
110 # Assigns the specified input to the arguments
111 convert_args.append(filename)
112
113 # Call bcftools
114 call_bcftools(convert_args)
115
116 def convert_to_vcfgz (filename, output_prefix):
117
118 # Holds the arguments to convert to VCFGZ format
119 convert_args = ['view', '-O', 'z']
120
121 # Stores the specified output_prefix to the VCFGZ file
122 vcfgz_output = '%s.vcf.gz' % output_prefix
123
124 # Assigns the output file to the arguments
125 convert_args.extend(['-o', vcfgz_output])
126
127 # Assigns the specified input to the arguments
128 convert_args.append(filename)
129
130 # Call bcftools
131 call_bcftools(convert_args)