comparison cnv-sim.py @ 13:e0f5a71e94ed draft

Uploaded
author ahosny
date Wed, 07 Sep 2016 09:32:24 -0400
parents 4a4d2b78eb55
children
comparison
equal deleted inserted replaced
12:12eb1e77bcfa 13:e0f5a71e94ed
9 9
10 from cnvsim.fileio import * 10 from cnvsim.fileio import *
11 from cnvsim.exome_simulator import * 11 from cnvsim.exome_simulator import *
12 from cnvsim.genome_simulator import * 12 from cnvsim.genome_simulator import *
13 13
14 class CapitalisedHelpFormatter(argparse.HelpFormatter):
15 def add_usage(self, usage, actions, groups, prefix=None):
16 if prefix is None:
17 prefix = 'Usage: '
18 return super(CapitalisedHelpFormatter, self).add_usage(usage, actions, groups, prefix)
19
14 def log(message): 20 def log(message):
15 print '[CNV SIM {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message 21 print '[CNV SIM {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message
16 22
23
17 def main(): 24 def main():
18 parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 25 parser = argparse.ArgumentParser(add_help=True, formatter_class=CapitalisedHelpFormatter, \
26 description='Generates NGS short reads that encompass copy number variations in whole genome and targeted exome sequencing')
27 parser._positionals.title = 'Positional arguments'
28 parser._optionals.title = 'Optional arguments'
29 parser.add_argument('-v', '--version', action='version', version = 'CNV-Sim v0.9.2', help = "Show program's version number and exit.")
30
19 parser.add_argument("simulation_type", type=str, choices=['genome', 'exome'], \ 31 parser.add_argument("simulation_type", type=str, choices=['genome', 'exome'], \
20 help="simulate copy number variations in whole genome or exome regions") 32 help="simulate copy number variations in whole genome or exome regions")
21 parser.add_argument("genome", type=file, \ 33 parser.add_argument("genome", type=file, \
22 help="path to the referece genome file in FASTA format ") 34 help="path to the referece genome file in FASTA format ")
23 parser.add_argument("target", type=file, nargs='?', default=None, \ 35 parser.add_argument("target", type=file, nargs='?', default=None, \
24 help="path to the target regions file in BED format (if using exome)") 36 help="path to the target regions file in BED format (if using exome)")
25 37
26 parser.add_argument("-o", "--output_dir_name",type=str, default="test", \ 38 parser.add_argument("-o", "--output_dir_name",type=str, default="simulation_output", \
27 help="a name to be used to create the output directory (overrides existing directory with the same name).") 39 help="a name to be used to create the output directory (overrides existing directory with the same name).")
28 parser.add_argument("-n", "--n_reads", type=int, default=10000, \ 40 parser.add_argument("-n", "--n_reads", type=int, default=10000, \
29 help="total number of reads without variations") 41 help="total number of reads without variations")
30 parser.add_argument("-l", "--read_length", type=int, default=100, \ 42 parser.add_argument("-l", "--read_length", type=int, default=100, \
31 help="read length (bp)") 43 help="read length (bp)")
32 parser.add_argument("--cnv_list", type=file, default=None, \ 44 parser.add_argument("--cnv_list", type=file, default=None, \
33 help="path to a CNV list file in BED format chr | start | end | variation. If not passed, it is randomly generated using CNV list parameters below") 45 help="path to a CNV list file in BED format chr | start | end | variation. If not passed, it is randomly generated using CNV list parameters below")
34 46
35 cnv_sim_group = parser.add_argument_group('CNV list parameters', "parameters to be used if CNV list is not passed") 47 cnv_sim_group = parser.add_argument_group('CNV list parameters', "parameters to be used if CNV list is not passed")
36 cnv_sim_group.add_argument("-g", "--regions_count", type=int, default=30, \ 48 cnv_sim_group.add_argument("-g", "--regions_count", type=int, default=20, \
37 help="number of CNV regions to be randomly generated") 49 help="number of CNV regions to be generated randomly")
38 cnv_sim_group.add_argument("-a", "--amplifications", type=float, default=0.30, \ 50 cnv_sim_group.add_argument("-r_min", "--region_minimum_length", type=int, default=1000, \
51 help="minimum length of each CNV region")
52 cnv_sim_group.add_argument("-r_max", "--region_maximum_length", type=int, default=100000, \
53 help="maximum length of each CNV region")
54 cnv_sim_group.add_argument("-a", "--amplifications", type=float, default=0.50, \
39 help="percentage of amplifications in range [0.0: 1.0].") 55 help="percentage of amplifications in range [0.0: 1.0].")
40 cnv_sim_group.add_argument("-d", "--deletions", type=float, default=0.20, \ 56 cnv_sim_group.add_argument("-d", "--deletions", type=float, default=0.50, \
41 help="percentage of deletions in range [0.0: 1.0].") 57 help="percentage of deletions in range [0.0: 1.0].")
42 cnv_sim_group.add_argument("-min", "--minimum", type=float, default=3, \ 58 cnv_sim_group.add_argument("-cn_min", "--copy_number_minimum", type=float, default=3, \
43 help="minimum number of amplifications/deletions introduced") 59 help="minimum level of variations (copy number) introduced")
44 cnv_sim_group.add_argument("-max", "--maximum", type=float, default=10, \ 60 cnv_sim_group.add_argument("-cn_max", "--copy_number_maximum", type=float, default=10, \
45 help="maximum number of amplifications/deletions introduced") 61 help="maximum level of variation (copy number) introduced")
46 62
47 args = parser.parse_args() 63 args = parser.parse_args()
48 64
49 simulation_parameters = {} 65 simulation_parameters = {}
50 simulation_parameters['type'] = args.simulation_type 66 simulation_parameters['type'] = args.simulation_type
62 simulation_parameters['cnv_list_file'] = None 78 simulation_parameters['cnv_list_file'] = None
63 simulation_parameters['tmp_dir'] = os.path.join(os.getcwd(), args.output_dir_name , "tmp") 79 simulation_parameters['tmp_dir'] = os.path.join(os.getcwd(), args.output_dir_name , "tmp")
64 80
65 cnv_list_parameters = {} 81 cnv_list_parameters = {}
66 cnv_list_parameters['regions_count'] = args.regions_count 82 cnv_list_parameters['regions_count'] = args.regions_count
83 cnv_list_parameters['minimum_length'] = args.region_minimum_length
84 cnv_list_parameters['maximum_length'] = args.region_maximum_length
67 cnv_list_parameters['amplifications'] = args.amplifications 85 cnv_list_parameters['amplifications'] = args.amplifications
68 cnv_list_parameters['deletions'] = args.deletions 86 cnv_list_parameters['deletions'] = args.deletions
69 cnv_list_parameters['minimum_variations'] = args.minimum 87 cnv_list_parameters['minimum_variations'] = args.copy_number_minimum
70 cnv_list_parameters['maximum_variations'] = args.maximum 88 cnv_list_parameters['maximum_variations'] = args.copy_number_maximum
89
90 if cnv_list_parameters['amplifications'] + cnv_list_parameters['deletions'] != 1.0:
91 log("ERROR: percentage of amplifications + percentage of deletions must be equal to 1.0")
92 exit()
71 93
72 if simulation_parameters['type'] == 'genome': 94 if simulation_parameters['type'] == 'genome':
73 simulate_genome_cnv(simulation_parameters, cnv_list_parameters) 95 simulate_genome_cnv(simulation_parameters, cnv_list_parameters)
74 else: 96 else:
75 simulate_exome_cnv(simulation_parameters, cnv_list_parameters) 97 simulate_exome_cnv(simulation_parameters, cnv_list_parameters)