annotate cnvsim/genome_simulator.py @ 18:eca72016b5b3 draft default tip

Uploaded
author ahosny
date Wed, 07 Sep 2016 09:37:49 -0400
parents 63955244b2fa
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
1 __author__ = 'Abdelrahman Hosny'
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
2
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
3 import random
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
4 import os
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
5 import subprocess
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
6 import datetime
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
7 import shutil
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
8
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
9 from . import fileio
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
10
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
11
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
12 def _log(message):
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
13 print '[CNV SIM {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
14
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
15
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
16 def getScriptPath():
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
17 return os.path.dirname(os.path.realpath(__file__))
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
18
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
19
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
20 def _generateCNVMask(mask_length, p_amplify, p_delete, min_variation, max_variation):
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
21 '''
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
22 This function generates random Copy Number Variations mask list
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
23 :param exons: list of regions.
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
24 :param p_amplify: percentage of amplifications introduced
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
25 :param p_delete: percentage of deletions introduced
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
26 :return: a list of the same length as the exons list. each list item
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
27 indicates the variation to be added to the exon in the same position.
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
28 Positive number: number of amplification
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
29 Zero: no change
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
30 -1: delete this exon
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
31 '''
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
32
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
33 number_of_amplifications = int(p_amplify * mask_length)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
34 number_of_deletions = int(p_delete * mask_length)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
35 cnv_mask = [0] * mask_length
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
36
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
37 # generate CNV mask (a list of amplifications and deletions to be applied to the exons)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
38 while number_of_amplifications > 0:
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
39 choice = random.randrange(0, mask_length)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
40 while cnv_mask[choice] != 0:
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
41 choice = random.randrange(0, mask_length)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
42 cnv_mask[choice] = random.randrange(min_variation, max_variation) # random amplifications in the range [min_variation, max_variation)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
43 number_of_amplifications -= 1
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
44 random.shuffle(cnv_mask)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
45 while number_of_deletions > 0:
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
46 choice = random.randrange(0, mask_length)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
47 while cnv_mask[choice] != 0:
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
48 choice = random.randrange(0, mask_length)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
49 cnv_mask[choice] = -1*random.randrange(min_variation, max_variation) # random deletions in the range [min_variation, max_variation)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
50 number_of_deletions -= 1
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
51 random.shuffle(cnv_mask)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
52
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
53 return cnv_mask
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
54
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
55
18
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
56 def _generateCNVList(chromosome_length, number_of_regions, p_amplify, p_delete, min_variation, max_variation, region_min_length, region_max_length):
5
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
57 '''
18
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
58 This function generates a CNV list to be used for simulating the CNVs in the next step
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
59 :param chromosome_length: the length of the simulated chromosome
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
60 :param number_of_regions: the number of regions to be affected by CNVs
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
61 :param p_amplify: percentage of amplification
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
62 :param p_delete: percentage of deletions
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
63 :param min_variation: minimum level of variation
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
64 :param max_variation: maximum level of variation
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
65 :param region_min_length: minimum length of a CNV region
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
66 :param region_max_length: maximum length of a CNV region
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
67 :return: CNV list
5
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
68 '''
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
69 region_length = chromosome_length / number_of_regions
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
70 start = 0
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
71 end = region_length
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
72 cnv_list = []
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
73 mask = _generateCNVMask(number_of_regions, p_amplify, p_delete, min_variation, max_variation)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
74 for i in range(number_of_regions):
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
75 # jump forward start
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
76 jump_start = start + int(random.randrange(int(0.40*region_length), int(0.45*region_length)))
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
77
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
78 # jump backward end
18
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
79 jump_end = jump_start + random.randint(region_min_length, region_max_length) - 1
5
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
80
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
81 cnv_list.append([jump_start, jump_end, mask[i]])
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
82 start = end
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
83 end += region_length
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
84
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
85 return cnv_list
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
86
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
87
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
88 def _simulateCNV(genome, cnv_list, read_length):
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
89 '''
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
90 Simulate the control genome and the CNV genome
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
91 :param genome: original genome sequence
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
92 :param cnv_list: a list of region variations (chromosome, start, end, variation)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
93 :return: control_genome, cnv_genome
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
94 '''
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
95 control_genome = []
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
96 cnv_genome = []
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
97
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
98 for cnv in cnv_list:
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
99 start = cnv[1]
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
100 end = cnv[2]
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
101 variation = cnv[3]
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
102 sequence = genome[start: end]
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
103
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
104 if variation > 0:
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
105 # amplification
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
106 amplification = genome[start-read_length: start] + sequence * variation + genome[end:end+read_length]
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
107 cnv_genome.append(amplification)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
108 elif variation < 0:
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
109 # deletion
18
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
110 deletion = genome[start-read_length: start] + sequence * abs(variation) + genome[end:end+read_length]
5
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
111 control_genome.append(deletion)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
112
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
113 return ''.join(control_genome), ''.join(cnv_genome)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
114
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
115
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
116 def _callART(genome_file, output_file, read_length, fold_coverage=1):
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
117 '''
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
118 Calls Wessim to generate artificial reads for the targets on the reference genome
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
119 :param genome_file: reference genome file in FASTA format
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
120 :param output_file: output file name
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
121 :param read_length: the read length
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
122 :param fold_coverage: fold coverage for the reads
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
123 :return: None
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
124 '''
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
125 os.chdir(os.path.join(getScriptPath(), "ART"))
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
126 subprocess.call(["./art_illumina", \
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
127 "-sam", \
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
128 "-i", genome_file, \
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
129 "-p", \
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
130 "-m", "200", \
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
131 "-s", "10", \
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
132 "-l", str(read_length), \
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
133 "-f", str(fold_coverage), \
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
134 "-o", output_file], stderr=None)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
135 os.chdir("..")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
136
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
137
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
138 def simulate_genome_cnv(simulation_parameters, cnv_list_parameters=None):
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
139 '''
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
140 Simulate copy number variations on the passed reference genome based on the given simulation parameters
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
141 :param simulation_parameters: a dictionary containing parameters for simulation
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
142 :param cnv_list_parameters: a dictionary containing parameters for CNV List creation
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
143 :return: None
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
144 '''
18
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
145 _log('simulation type: whole genome sequencing')
5
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
146
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
147 # create a temporary directory for intermediate files
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
148 if not os.path.exists(simulation_parameters['tmp_dir']):
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
149 os.makedirs(simulation_parameters['tmp_dir'])
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
150
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
151 # create output directory for final results
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
152 if not os.path.exists(simulation_parameters['output_dir']):
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
153 os.makedirs(simulation_parameters['output_dir'])
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
154
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
155 # copy genome to the tmp folder
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
156 shutil.copyfile(simulation_parameters['genome_file'], os.path.join(simulation_parameters['tmp_dir'], "reference.fa"))
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
157 genome_file = os.path.join(simulation_parameters['tmp_dir'], "reference.fa")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
158
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
159 # initialize variables for temporary files
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
160 control_genome_file = os.path.join(simulation_parameters['tmp_dir'], "ControlGenome.fa")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
161 cnv_genome_file = os.path.join(simulation_parameters['tmp_dir'], "CNVGenome.fa")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
162 base_reads_file = os.path.join(simulation_parameters['tmp_dir'], "base")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
163 control_reads_file = os.path.join(simulation_parameters['tmp_dir'], "control")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
164 cnv_reads_file = os.path.join(simulation_parameters['tmp_dir'], "cnv")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
165
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
166 _log("loading genome file ..")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
167 header, genome = fileio.readGenome(genome_file)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
168 _log("successfully loaded a genome of length " + `len(genome)`)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
169
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
170 if simulation_parameters['cnv_list_file'] is None:
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
171 # CNV list file
18
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
172 cnv_list_file = os.path.join(simulation_parameters['output_dir'], "copynumber.bed")
5
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
173
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
174 _log("generating CNV list ..")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
175 cnv_list = _generateCNVList(len(genome), cnv_list_parameters['regions_count'], \
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
176 cnv_list_parameters['amplifications'], cnv_list_parameters['deletions'], \
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
177 cnv_list_parameters['minimum_variations'], \
18
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
178 cnv_list_parameters['maximum_variations'], \
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
179 cnv_list_parameters['minimum_length'], \
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
180 cnv_list_parameters['maximum_length'])
5
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
181 cnv_list = map(lambda l: [header.replace('>', '')] + l, cnv_list)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
182
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
183 with open(cnv_list_file, 'w') as f:
18
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
184 line = 'chrom\tchr_start\tchrom_stop\tnum_positions\tcopy_number\n'
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
185 f.write(line)
5
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
186 for i, cnv_region in enumerate(cnv_list):
18
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
187 num_positions = cnv_region[2] - cnv_region[1] + 1
5
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
188 line = cnv_region[0] + '\t' \
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
189 + `cnv_region[1]` + '\t' \
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
190 + `cnv_region[2]` + '\t' \
18
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
191 + `num_positions` + '\t' \
5
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
192 + `cnv_region[3]` + '\n'
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
193 f.write(line)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
194 _log("randomly generated CNV list saved to " + cnv_list_file)
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
195
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
196 else:
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
197 _log("loading CNV list ..")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
198 with open(simulation_parameters['cnv_list_file'], "r") as f:
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
199 cnv_list = []
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
200 lines = f.readlines()
18
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
201 lines.pop(0)
5
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
202 for line in lines:
18
eca72016b5b3 Uploaded
ahosny
parents: 5
diff changeset
203 chromosome, region_start, region_end, num_positions, variation = line.strip().split("\t")
5
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
204 cnv_list.append((chromosome, int(region_start), int(region_end), int(variation)))
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
205
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
206 _log("successfully loaded CNV list that contains " + `len(lines)` + " regions ..")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
207
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
208 # call ART to generate reads from the genome file
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
209 _log("generating reads for the genome ..")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
210 _log("delegating job to ART ...")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
211 _callART(genome_file, base_reads_file, simulation_parameters['read_length'])
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
212
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
213 _log("simulating copy number variations (amplifications/deletions)")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
214 control_genome, cnv_genome = _simulateCNV(genome, cnv_list, simulation_parameters['read_length'])
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
215
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
216 _log("saving to the control genome file ..")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
217
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
218 with open(control_genome_file, 'w') as fw:
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
219 fw.write(header + "\n")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
220 n = 50
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
221 l = [control_genome[i:i + n] for i in range(0, len(control_genome), n)]
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
222 for line in l:
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
223 fw.write(line + "\n")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
224
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
225 _log("delegating job to ART ...")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
226 _callART(control_genome_file, control_reads_file, simulation_parameters['read_length'])
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
227
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
228 _log("saving to the CNV genome file ..")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
229 with open(cnv_genome_file, 'w') as fw:
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
230 fw.write(header + "\n")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
231 n = 50
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
232 l = [cnv_genome[i:i + n] for i in range(0, len(cnv_genome), n)]
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
233 for line in l:
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
234 fw.write(line + "\n")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
235
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
236 _log("delegating job to ART ...")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
237 _callART(cnv_genome_file, cnv_reads_file, simulation_parameters['read_length'])
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
238
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
239 _log("merging results ..")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
240 fileio.mergeARTReads(simulation_parameters['tmp_dir'], simulation_parameters['output_dir'])
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
241
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
242 _log("cleaning temporary files ..")
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
243 fileio.clean(simulation_parameters['tmp_dir'])
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
244
63955244b2fa Uploaded cnvsim library package
ahosny
parents:
diff changeset
245 _log("simulation completed. find results in " + simulation_parameters['output_dir'])