0
|
1 import os
|
|
2 import sys
|
|
3 import logging
|
|
4 import subprocess
|
|
5
|
|
6 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir,'jared')))
|
|
7
|
|
8 from vcf_reader_func import checkFormat
|
|
9
|
|
10 def check_bcftools_for_errors (bcftools_stderr):
|
|
11 '''
|
|
12 Checks the bgzip stderr for errors
|
|
13
|
|
14 Parameters
|
|
15 ----------
|
|
16 bcftools_stderr : str
|
|
17 bcftools stderr
|
|
18
|
|
19 Raises
|
|
20 ------
|
|
21 IOError
|
|
22 If bcftools stderr returns an error
|
|
23 '''
|
|
24
|
|
25 # Expand as errors are discovered
|
|
26 if bcftools_stderr:
|
|
27 logging.error(vcftools_stderr)
|
|
28 raise Exception(vcftools_stderr)
|
|
29
|
|
30 def call_bcftools (bcftools_call_args):
|
|
31
|
|
32 # bcftools subprocess call
|
|
33 bcftools_call = subprocess.Popen(['bcftools'] + list(map(str, bcftools_call_args)), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
34
|
|
35 # Wait for bcftools to finish
|
|
36 bcftools_out, bcftools_err = bcftools_call.communicate()
|
|
37
|
|
38 check_bcftools_for_errors(bcftools_err)
|
|
39
|
|
40 logging.info('bcftools call complete')
|
|
41
|
|
42 def check_for_index (filename):
|
|
43
|
|
44 # Assign the file format
|
|
45 file_format = checkFormat(filename)
|
|
46
|
|
47 # Check if the file to be indexed is a vcf.gz
|
|
48 if file_format == 'bgzip':
|
|
49 # Check if the index (.tbi) exists
|
|
50 if os.path.isfile(filename + '.tbi'):
|
|
51 return True
|
|
52
|
|
53 # Check if the file to be indexed is a bcf
|
|
54 elif file_format == 'bcf':
|
|
55 # Check if the index (.csi) exists
|
|
56 if os.path.isfile(filename + '.csi'):
|
|
57 return True
|
|
58
|
|
59 # Return false if no index is found
|
|
60 return False
|
|
61
|
|
62 def create_index (filename):
|
|
63
|
|
64 # Assign the file format
|
|
65 file_format = checkFormat(filename)
|
|
66
|
|
67 # Check if the file to be indexed is a vcf.gz
|
|
68 if file_format == 'bgzip':
|
|
69 # Create a index (.tbi)
|
|
70 call_bcftools(['index', '-t', filename])
|
|
71
|
|
72 # Check if the file to be indexed is a bcf
|
|
73 elif file_format == 'bcf':
|
|
74 # Create a index (.csi)
|
|
75 call_bcftools(['index', '-c', filename])
|
|
76
|
|
77 # Report if file cannot be indexed
|
|
78 else:
|
|
79 raise Exception('Error creating index for: %s. Only .bcf and .vcf.gz (bgzip) files are supported.' % filename)
|
|
80
|
|
81 def convert_to_bcf (filename, output_prefix):
|
|
82
|
|
83 # Holds the arguments to convert to BCF format
|
|
84 convert_args = ['convert', '-O', 'b']
|
|
85
|
|
86 # Stores the specified output_prefix to the BCF file
|
|
87 bcf_output = '%s.bcf' % output_prefix
|
|
88
|
|
89 # Assigns the output file to the arguments
|
|
90 convert_args.extend(['-o', bcf_output])
|
|
91
|
|
92 # Assigns the specified input to the arguments
|
|
93 convert_args.append(filename)
|
|
94
|
|
95 # Call bcftools
|
|
96 call_bcftools(convert_args)
|
|
97
|
|
98
|
|
99 def convert_to_vcf (filename, output_prefix):
|
|
100
|
|
101 # Holds the arguments to convert to VCF format
|
|
102 convert_args = ['view', '-O', 'v']
|
|
103
|
|
104 # Stores the specified output_prefix to the VCF file
|
|
105 vcf_output = '%s.vcf' % output_prefix
|
|
106
|
|
107 # Assigns the output file to the arguments
|
|
108 convert_args.extend(['-o', vcf_output])
|
|
109
|
|
110 # Assigns the specified input to the arguments
|
|
111 convert_args.append(filename)
|
|
112
|
|
113 # Call bcftools
|
|
114 call_bcftools(convert_args)
|
|
115
|
|
116 def convert_to_vcfgz (filename, output_prefix):
|
|
117
|
|
118 # Holds the arguments to convert to VCFGZ format
|
|
119 convert_args = ['view', '-O', 'z']
|
|
120
|
|
121 # Stores the specified output_prefix to the VCFGZ file
|
|
122 vcfgz_output = '%s.vcf.gz' % output_prefix
|
|
123
|
|
124 # Assigns the output file to the arguments
|
|
125 convert_args.extend(['-o', vcfgz_output])
|
|
126
|
|
127 # Assigns the specified input to the arguments
|
|
128 convert_args.append(filename)
|
|
129
|
|
130 # Call bcftools
|
|
131 call_bcftools(convert_args)
|